diff --git a/.azure-pipelines/model-test-3x.yml b/.azure-pipelines/model-test-3x.yml
index 34fd0a54209..e6c6d1250ab 100644
--- a/.azure-pipelines/model-test-3x.yml
+++ b/.azure-pipelines/model-test-3x.yml
@@ -11,7 +11,7 @@ pr:
       - neural_compressor/common
       - neural_compressor/torch
       - neural_compressor/transformers
-      - examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only
+      - examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only
       - setup.py
       - requirements_pt.txt
       - .azure-pipelines/scripts/models
diff --git a/.azure-pipelines/model-test.yml b/.azure-pipelines/model-test.yml
index 59b7c92dea0..51a01399c4e 100644
--- a/.azure-pipelines/model-test.yml
+++ b/.azure-pipelines/model-test.yml
@@ -14,7 +14,7 @@ pr:
       - .azure-pipelines/model-test.yml
       - .azure-pipelines/template/docker-template.yml
       - .azure-pipelines/scripts/models
-      - examples/tensorflow/oob_models/quantization/ptq
+      - examples/deprecated/tensorflow/oob_models/quantization/ptq
       - .azure-pipelines/model-test.yml
       - .azure-pipelines/scripts/fwk_version.sh
       - .azure-pipelines/scripts/install_nc.sh
diff --git a/.azure-pipelines/scripts/models/env_setup.sh b/.azure-pipelines/scripts/models/env_setup.sh
index 65d2b09edd7..0182b2781c9 100644
--- a/.azure-pipelines/scripts/models/env_setup.sh
+++ b/.azure-pipelines/scripts/models/env_setup.sh
@@ -51,13 +51,13 @@ SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
 log_dir="/neural-compressor/.azure-pipelines/scripts/models"
 if [[ "${inc_new_api}" == "3x"* ]]; then
     pip install cmake==3.31.6
-    WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
+    WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
     git clone https://github.com/intel/intel-extension-for-transformers.git /itrex
     cd /itrex
     pip install -r requirements.txt
     pip install -v .
 else
-    WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
+    WORK_SOURCE_DIR="/neural-compressor/examples/deprecated/${framework}"
 fi
 
 $BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
diff --git a/.azure-pipelines/scripts/models/run_model_trigger_common.sh b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
index 56424a5f0c6..bdaeb613c1d 100644
--- a/.azure-pipelines/scripts/models/run_model_trigger_common.sh
+++ b/.azure-pipelines/scripts/models/run_model_trigger_common.sh
@@ -58,9 +58,9 @@ function check_results() {
 log_dir="/neural-compressor/.azure-pipelines/scripts/models"
 SCRIPTS_PATH="/neural-compressor/.azure-pipelines/scripts/models"
 if [[ "${inc_new_api}" == "3x"* ]]; then
-    WORK_SOURCE_DIR="/neural-compressor/examples/3.x_api/${framework}"
-else
     WORK_SOURCE_DIR="/neural-compressor/examples/${framework}"
+else
+    WORK_SOURCE_DIR="/neural-compressor/examples/deprecated/${framework}"
 fi
 $BOLD_YELLOW && echo "processing ${framework}-${fwk_ver}-${model}" && $RESET
 
diff --git a/README.md b/README.md
index 362f47676cf..5b2f18d14d8 100644
--- a/README.md
+++ b/README.md
@@ -22,7 +22,7 @@ In particular, the tool provides the key features, typical examples, and open co
 * Support a wide range of Intel hardware such as [Intel Gaudi Al Accelerators](https://www.intel.com/content/www/us/en/products/details/processors/ai-accelerators/gaudi-overview.html), [Intel Core Ultra Processors](https://www.intel.com/content/www/us/en/products/details/processors/core-ultra.html), [Intel Xeon Scalable Processors](https://www.intel.com/content/www/us/en/products/details/processors/xeon/scalable.html), [Intel Xeon CPU Max Series](https://www.intel.com/content/www/us/en/products/details/processors/xeon/max-series.html), [Intel Data Center GPU Flex Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/flex-series.html), and [Intel Data Center GPU Max Series](https://www.intel.com/content/www/us/en/products/details/discrete-gpus/data-center-gpu/max-series.html) with extensive testing;
 support AMD CPU, ARM CPU, and NVidia GPU through ONNX Runtime with limited testing; support NVidia GPU for some WOQ algorithms like AutoRound and HQQ.
 
-* Validate popular LLMs such as [LLama2](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies
+* Validate popular LLMs such as [LLama2](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Falcon](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [GPT-J](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [Bloom](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), [OPT](/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm), and more than 10,000 broad models such as [Stable Diffusion](/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization), [BERT-Large](/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx), and [ResNet50](/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx) from popular model hubs such as [Hugging Face](https://huggingface.co/), [Torch Vision](https://pytorch.org/vision/stable/index.html), and [ONNX Model Zoo](https://github.com/onnx/models#models), with automatic [accuracy-driven](/docs/source/design.md#workflow) quantization strategies
 
 * Collaborate with cloud marketplaces such as [Google Cloud Platform](https://console.cloud.google.com/marketplace/product/bitnami-launchpad/inc-tensorflow-intel?project=verdant-sensor-286207), [Amazon Web Services](https://aws.amazon.com/marketplace/pp/prodview-yjyh2xmggbmga#pdp-support), and [Azure](https://azuremarketplace.microsoft.com/en-us/marketplace/apps/bitnami.inc-tensorflow-intel), software platforms such as [Alibaba Cloud](https://www.intel.com/content/www/us/en/developer/articles/technical/quantize-ai-by-oneapi-analytics-on-alibaba-cloud.html), [Tencent TACO](https://new.qq.com/rain/a/20221202A00B9S00) and [Microsoft Olive](https://github.com/microsoft/Olive), and open AI ecosystem such as [Hugging Face](https://huggingface.co/blog/intel), [PyTorch](https://pytorch.org/tutorials/recipes/intel_neural_compressor_for_pytorch.html), [ONNX](https://github.com/onnx/models#models), [ONNX Runtime](https://github.com/microsoft/onnxruntime), and [Lightning AI](https://github.com/Lightning-AI/lightning/blob/master/docs/source-pytorch/advanced/post_training_quantization.rst)
 
diff --git a/docs/source/3x/PT_FP8Quant.md b/docs/source/3x/PT_FP8Quant.md
index 22d878ad277..585c6486e56 100644
--- a/docs/source/3x/PT_FP8Quant.md
+++ b/docs/source/3x/PT_FP8Quant.md
@@ -91,7 +91,7 @@ During runtime, Intel Neural Compressor will detect hardware automatically and t
 
 ## Get Start with FP8 Quantization
 [Demo Usage](https://github.com/intel/neural-compressor?tab=readme-ov-file#getting-started)    
-[Computer vision example](../../../examples/3.x_api/pytorch/cv/fp8_quant)
+[Computer vision example](../../../examples/pytorch/cv/fp8_quant)
 
 ## Optimum-habana LLM example
 ### Overview
diff --git a/docs/source/3x/PT_MXQuant.md b/docs/source/3x/PT_MXQuant.md
index 42e12d039a6..44dbd05e7d4 100644
--- a/docs/source/3x/PT_MXQuant.md
+++ b/docs/source/3x/PT_MXQuant.md
@@ -95,7 +95,7 @@ user_model = convert(model=user_model)
 
 ## Examples
 
-- PyTorch [huggingface models](/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant)
+- PyTorch [huggingface models](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant)
 
 
 ## Reference
diff --git a/docs/source/benchmark.md b/docs/source/benchmark.md
index 21a3e2c37cd..6bd2bf1ea7e 100644
--- a/docs/source/benchmark.md
+++ b/docs/source/benchmark.md
@@ -57,4 +57,4 @@ fit(model="./int8.pb", conf=conf, b_dataloader=eval_dataloader)
 
 ## Examples
 
-Refer to the [Benchmark example](../../examples/helloworld/tf_example5).
+Refer to the [Benchmark example](../../examples/deprecated/helloworld/tf_example5).
diff --git a/docs/source/distillation.md b/docs/source/distillation.md
index 7e2d6b063ff..04ba1ca8110 100644
--- a/docs/source/distillation.md
+++ b/docs/source/distillation.md
@@ -107,7 +107,7 @@ model = training_func_for_nc(model)
 eval_func(model)
 ```
 
-For Intermediate Layer Knowledge Distillation or Self Distillation, the only difference to above launcher code is that `distil_loss_conf` should be set accordingly as shown below. More detailed settings can be found in this [example](../../examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py#L510) for Intermediate Layer Knowledge Distillation and this [example](../../examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py#L344) for Self Distillation.
+For Intermediate Layer Knowledge Distillation or Self Distillation, the only difference to above launcher code is that `distil_loss_conf` should be set accordingly as shown below. More detailed settings can be found in this [example](../../examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py#L510) for Intermediate Layer Knowledge Distillation and this [example](../../examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py#L344) for Self Distillation.
 
 ```python
 from neural_compressor.config import (
@@ -122,8 +122,8 @@ distil_loss_conf = IntermediateLayersKnowledgeDistillationLossConfig(layer_mappi
 distil_loss_conf = SelfKnowledgeDistillationLossConfig(layer_mappings=layer_mappings)
 ```
 ## Examples
-[Distillation PyTorch Examples](../../examples/README.md#distillation-1)
+[Distillation PyTorch Examples](../../examples/deprecated/README.md#distillation-1)
 <br>
-[Distillation TensorFlow Examples](../../examples/README.md#distillation)
+[Distillation TensorFlow Examples](../../examples/deprecated/README.md#distillation)
 <br>
 [Distillation Examples Results](./validated_model_list.md#validated-knowledge-distillation-examples)
diff --git a/docs/source/mixed_precision.md b/docs/source/mixed_precision.md
index a3f5e2e44bd..edeb033e8f5 100644
--- a/docs/source/mixed_precision.md
+++ b/docs/source/mixed_precision.md
@@ -160,8 +160,8 @@ converted_model.save("./path/to/save/")
   
 ## Examples
 
-- Quick started with [helloworld example](/examples/helloworld/tf_example3)
-- PyTorch [ResNet18](/examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18)
-- IPEX [DistilBERT base](/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex)
-- Tensorflow [ResNet50](/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision)
-- ONNX Runtime [Bert base](/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision)
+- Quick started with [helloworld example](/examples/deprecated/helloworld/tf_example3)
+- PyTorch [ResNet18](/examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18)
+- IPEX [DistilBERT base](/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex)
+- Tensorflow [ResNet50](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision)
+- ONNX Runtime [Bert base](/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision)
diff --git a/docs/source/mx_quantization.md b/docs/source/mx_quantization.md
index d6b9659cfa9..99af3ea8d1d 100644
--- a/docs/source/mx_quantization.md
+++ b/docs/source/mx_quantization.md
@@ -118,7 +118,7 @@ user_model = quantize(model=user_model, quant_config=quant_config)
   
 ## Examples
 
-- PyTorch [huggingface models](/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx)
+- PyTorch [huggingface models](/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx)
 
 
 ## Reference
diff --git a/docs/source/orchestration.md b/docs/source/orchestration.md
index 2128e28660e..12e13297159 100644
--- a/docs/source/orchestration.md
+++ b/docs/source/orchestration.md
@@ -90,4 +90,4 @@ model.save('./path/to/save')
 
 ## Examples
 
-[Orchestration Examples](../../examples/README.md#orchestration)
+[Orchestration Examples](../../examples/deprecated/README.md#orchestration)
diff --git a/docs/source/pruning.md b/docs/source/pruning.md
index 1d7b321ea40..4a94d868de1 100644
--- a/docs/source/pruning.md
+++ b/docs/source/pruning.md
@@ -91,7 +91,7 @@ Pruning patterns defines the rules of pruned weights' arrangements in space. Int
   An advantage of channel pruning is that in some particular structure(feed forward parts in Transformers etc.), pruned channels can be removed permanently from original weights without influencing other dense channels. Via this process, we can decrease these weights' size and obtain direct improvements of inference speed, without using hardware related optimization tools like [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers). 
 
 
-  We name this process as <span id="click">**Model Auto Slim**(experimental feature)</span> and currently we have validated that this process can significantly improve some popular transformer model's inference speed. Currently this method is under development and only supports some particular structures. Please refer more details of such method in this [model slim example](../../examples/pytorch/nlp/huggingface_models/question-answering/model_slim/).
+  We name this process as <span id="click">**Model Auto Slim**(experimental feature)</span> and currently we have validated that this process can significantly improve some popular transformer model's inference speed. Currently this method is under development and only supports some particular structures. Please refer more details of such method in this [model slim example](../../examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/).
 
 - Unstructured Pruning
 
@@ -478,25 +478,25 @@ The pruning technique  is validated on typical models across various domains (in
 
 - Text Classification
 
-  Sparsity is implemented in different pruning patterns of MRPC and SST-2 tasks [Text-classification examples](../../examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager).
+  Sparsity is implemented in different pruning patterns of MRPC and SST-2 tasks [Text-classification examples](../../examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager).
 
 - Question Answering
 
-  Multiple examples of sparse models were obtained on the SQuAD-v1.1 dataset [Question-answering examples](../../examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager).
+  Multiple examples of sparse models were obtained on the SQuAD-v1.1 dataset [Question-answering examples](../../examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager).
 
 - Language Translation (Experimental, sparsity 0.8, pattern 4x1, BLEU 25.63(dense) vs 24.35(sparse))
 
-  Pruning Flan-T5-small model on English-Romanian translation task [Translation examples](../../examples/pytorch/nlp/huggingface_models/translation/pruning/eager).
+  Pruning Flan-T5-small model on English-Romanian translation task [Translation examples](../../examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager).
 
 - Object Detection (Experimental, sparsity 0.8, pattern 4x1, mAP 0.404(dense) vs 0.381(sparse))
 
-  Pruning on YOLOv5 model using coco dataset [Object-detection examples](../../examples/pytorch/object_detection/yolo_v5/pruning/eager).
+  Pruning on YOLOv5 model using coco dataset [Object-detection examples](../../examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager).
 
 - Image Recognition (Experimental, sparsity 0.75, pattern 2x1, top1 acc 0.801(dense) vs 0.7895(sparse))
 
-  Pruning on ResNet50 model using ImageNet dataset [Image-recognition examples](../../examples/pytorch/image_recognition/ResNet50/pruning/eager/).
+  Pruning on ResNet50 model using ImageNet dataset [Image-recognition examples](../../examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/).
 
-Please refer to [pruning examples](../../examples/README.md#Pruning-1) for more information.
+Please refer to [pruning examples](../../examples/deprecated/README.md#Pruning-1) for more information.
 
 ## Sparse Model Deployment
 
diff --git a/docs/source/quantization.md b/docs/source/quantization.md
index b3155fe1141..71c76f5049f 100644
--- a/docs/source/quantization.md
+++ b/docs/source/quantization.md
@@ -543,4 +543,4 @@ conf = PostTrainingQuantConfig(backend="itex", device="gpu")
 ## Examples
 
 User could refer to [examples](https://github.com/intel/neural-compressor/blob/master/examples/README.md) on how to quantize a new model.
-If user wants to quantize an onnx model with npu, please refer to this [example](../../examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md). If user wants to quantize a pytorch model with Intel GPU, please refer to this [example](../../examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md).
+If user wants to quantize an onnx model with npu, please refer to this [example](../../examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md). If user wants to quantize a pytorch model with Intel GPU, please refer to this [example](../../examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md).
diff --git a/docs/source/quantization_layer_wise.md b/docs/source/quantization_layer_wise.md
index 27218452fe1..a7ebdee3977 100644
--- a/docs/source/quantization_layer_wise.md
+++ b/docs/source/quantization_layer_wise.md
@@ -95,4 +95,4 @@ q_model = quantization.fit(fp32_model_path, conf, calib_dataloader=dataloader)
 q_model.save(int8_model_path)
 ```
 
-Refer to [ONNX Runtime llama-2 LWQ example](../../examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only)
+Refer to [ONNX Runtime llama-2 LWQ example](../../examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only)
diff --git a/docs/source/quantization_weight_only.md b/docs/source/quantization_weight_only.md
index 13200d536c8..7534b69fb3c 100644
--- a/docs/source/quantization_weight_only.md
+++ b/docs/source/quantization_weight_only.md
@@ -177,7 +177,7 @@ q_model = quantization.fit(model, conf, eval_func=eval_func, calib_dataloader=da
 q_model.save("saved_results")
 ```
 
-Refer to this [link](../../examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only) for an example of WOQ algorithms tuning on ONNX Llama models.
+Refer to this [link](../../examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only) for an example of WOQ algorithms tuning on ONNX Llama models.
 
 
 
diff --git a/docs/source/smooth_quant.md b/docs/source/smooth_quant.md
index 98f723913cc..2a54f967c49 100644
--- a/docs/source/smooth_quant.md
+++ b/docs/source/smooth_quant.md
@@ -375,7 +375,7 @@ A list of models that achieved a <1% accuracy drop is shown below.
 | databricks/dolly-v2-3b* | 0.6297 | 0.6247 | alpha=0.5, Ipex 2.1 |
 | tiiuae/falcon-7b-instruct | 0.6437 | 0.6392 | alpha=0.7, Pytorch |
 
-The results listed below are achieved using IPEX optimize_transformers in model initialization for better performance. Please refer to the step-by-step [instruction](../../examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details.
+The results listed below are achieved using IPEX optimize_transformers in model initialization for better performance. Please refer to the step-by-step [instruction](../../examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/ipex/README.md) for details.
 | Model/Last token accuracy |  FP32 Accuracy   | INT8 (w/ SmoothQuant) | Notes |
 |:----------:|:------:|:------:|-----------------------------------|
 | LLaMa-2-7b-hf* | 0.7392 | 0.7332  | alpha=Auto, Ipex 2.1 |
diff --git a/docs/source/tuning_strategies.md b/docs/source/tuning_strategies.md
index cf2cc9e3980..2822fafb5ba 100644
--- a/docs/source/tuning_strategies.md
+++ b/docs/source/tuning_strategies.md
@@ -495,7 +495,7 @@ To use Distributed Tuning, the number of processes should be specified to be gre
 ```shell
 mpirun -np <number_of_processes> <RUN_CMD>
 ```
-An example of distributed tuning can be reached at [ptq_static_mrpc](../../examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx).
+An example of distributed tuning can be reached at [ptq_static_mrpc](../../examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx).
 
 
 ## Customize a New Tuning Strategy
diff --git a/examples/3.x_api/README.md b/examples/3.x_api/README.md
deleted file mode 100644
index fd79f210533..00000000000
--- a/examples/3.x_api/README.md
+++ /dev/null
@@ -1,169 +0,0 @@
-# Examples
-
-Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md).
-
-
-# PyTorch Examples
-
-## Quantization
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Method </th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-
-<tr>
-    <td rowspan="2">gpt_j</td>
-    <td rowspan="2">Natural Language Processing</td>
-    <td>Weight-Only Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
-</tr>
-<tr>
-    <td>Static Quantization (IPEX)</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
-</tr>
-<tr>
-    <td rowspan="2">llama2_7b</td>
-    <td rowspan="2">Natural Language Processing</td>
-    <td>Weight-Only Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
-</tr>
-<tr>
-    <td>Static Quantization (IPEX)</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
-</tr>
-<tr>
-    <td rowspan="3">opt_125m</td>
-    <td rowspan="3">Natural Language Processing</td>
-    <td>Static Quantization (IPEX)</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
-</tr>
-<tr>
-    <td>Static Quantization (PT2E)</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e">link</a></td>
-</tr>
-<tr>
-    <td>Weight-Only Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
-</tr>
-<tr>
-    <td rowspan="2">resnet18</td>
-    <td rowspan="2">Image Recognition</td>
-    <td>Mixed Precision</td>
-    <td><a href="./pytorch/cv/mixed_precision">link</a></td>
-</tr>
-<tr>
-    <td>Static Quantization</td>
-    <td><a href="./pytorch/cv/static_quant">link</a></td>
-</tr>
-</tbody>
-</table>
-
-
-# TensorFlow Examples
-
-## Quantization
-
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Method</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
- <tr>
-    <td>bert_large_squad_model_zoo</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>transformer_lt</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/transformer_lt/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>inception_v3</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/inception_v3/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>mobilenetv2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/mobilenet_v2/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>resnetv2_50</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/resnet_v2_50/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>vgg16</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/vgg16/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>ViT</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/vision_transformer/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>GraphSage</td>
-    <td>Graph Networks</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/graph_networks/graphsage/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>yolo_v5</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/yolo_v5/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>faster_rcnn_resnet50</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>mask_rcnn_inception_v2</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>ssd_mobilenet_v1</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>wide_deep_large_ds</td>
-    <td>Recommendation</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/recommendation/wide_deep_large_ds/quantization/ptq">link</a></td>
-</tr>
-<tr>
-    <td>3dunet-mlperf</td>
-    <td>Semantic Image Segmentation</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq">link</a></td>
-</tr>
-
-</tbody>
-</table>
-
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
deleted file mode 100644
index 23c79d8bbd3..00000000000
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-tensorflow==2.12
-transformers
-datasets==2.17
-numpy
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt b/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
deleted file mode 100644
index 8e5871fe7fd..00000000000
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-tensorflow==2.15
-datasets
-transformers==4.52.1
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
deleted file mode 100644
index d5069f8038f..00000000000
--- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-nnunet
-tensorflow
\ No newline at end of file
diff --git a/examples/README.md b/examples/README.md
index 81349a1260d..66e85aa8f2e 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -1,977 +1,71 @@
-Examples
-==========
-Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../docs/source/validated_model_list.md).
+# Examples
 
-> Note: `3.x_api` here is the target example folder for version >= 3.0, which contains pytorch and tensorflow related examples. TensorFlow and ONNX related examples here are no longer maintained. 
+Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration.
 
+# PyTorch Examples
 
-# Quick Get Started Notebook Examples
-* [Quick Get Started Notebook of Intel® Neural Compressor for ONNXRuntime (Deprecated)](/examples/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb)
-
-* [Quick Get Started Notebook of Intel® Neural Compressor for Tensorflow (Deprecated)](/examples/notebook/tensorflow/resnet/resnet_quantization.ipynb)
-
-# Helloworld Examples
-* [torch_llm](/examples/helloworld/torch_llm): apply the weight-only quantization to LLMs.
-* [torch_non_llm](/examples/helloworld/torch_non_llm): apply the static quantization to non-LLMs.
-* [tf_example1 (Deprecated)](/examples/helloworld/tf_example1): quantize with built-in dataloader and metric.
-* [tf_example2 (Deprecated)](/examples/helloworld/tf_example2): quantize keras model with customized metric and dataloader.
-* [tf_example3 (Deprecated)](/examples/helloworld/tf_example3): convert model with mix precision.
-* [tf_example4 (Deprecated)](/examples/helloworld/tf_example4): quantize checkpoint with dummy dataloader.
-* [tf_example5 (Deprecated)](/examples/helloworld/tf_example5): config performance and accuracy measurement.
-* [tf_example6 (Deprecated)](/examples/helloworld/tf_example6): use default user-facing APIs to quantize a pb model.
-* [tf_example7 (Deprecated)](/examples/helloworld/tf_example7): quantize and benchmark with pure python API.
-
-# TensorFlow Examples (Deprecated)
-## Quantization
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>ResNet50 V1.0</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50 V1.5</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnet50/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>ResNet101</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnet101/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V1</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq">pb</a> / <a href="./keras/image_recognition/mobilenet_v2/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V3</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Inception V1</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Inception V2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Inception V3</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq">pb</a> / <a href="./keras/image_recognition/inception_v3/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>Inception V4</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Inception ResNet V2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq">pb</a> / <a href="./keras/image_recognition/inception_resnet_v2/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>VGG16</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq">pb</a> / <a href="./keras/image_recognition/vgg16/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>VGG19</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq">pb</a> / <a href="./keras/image_recognition/vgg19/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>ResNet V2 50</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnetv2_50/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>ResNet V2 101</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnetv2_101/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>ResNet V2 152</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>DenseNet121</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>DenseNet161</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>DenseNet169</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>EfficientNet B0</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>Xception</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./keras/image_recognition/xception/quantization/ptq">keras</a></td>
-  </tr>
-  <tr>
-    <td>ResNet V2</td>
-    <td>Image Recognition</td>
-    <td>Quantization-Aware Training</td>
-    <td><a href="./tensorflow/image_recognition/resnet_v2/quantization/qat">keras</a> </td>
-  </tr>
-  <tr>
-    <td>EfficientNet V2 B0</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq">SavedModel</a></td>
-  </tr>
-    <td>BERT base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/bert_base_mrpc/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>BERT large SQuAD (Model Zoo)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>BERT large SQuAD</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/bert_large_squad/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>DistilBERT base</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/distilbert_base/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Transformer LT</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/transformer_lt/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Transformer LT MLPerf</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/transformer_lt_mlperf/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>SSD ResNet50 V1</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>SSD MobileNet V1</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>Faster R-CNN Inception ResNet V2</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq">SavedModel</a></td>
-  </tr>
-  <tr>
-    <td>Faster R-CNN ResNet101</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq">SavedModel</a></td>
-  </tr>
-  <tr>
-    <td>Faster R-CNN ResNet50</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Mask R-CNN Inception V2</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>SSD ResNet34</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>YOLOv3</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/object_detection/yolo_v3/quantization/ptq">pb</a></td>
-  </tr>
-  <tr>
-    <td>Wide & Deep</td>
-    <td>Recommendation</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/recommendation/wide_deep_large_ds/quantization/ptq">pb</a></td>
-  </tr> 
-  <tr>
-    <td>Arbitrary Style Transfer</td>
-    <td>Style Transfer</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq">ckpt</a></td>
-  </tr>
-  <tr>
-    <td>OPT</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/smoothquant">pb (smooth quant)</a></td>
-  </tr>
-  <tr>
-    <td>GPT2</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/smoothquant">pb (smooth quant)</a></td>
-  </tr>
-  <tr>
-    <td>ViT</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/vision_transformer/">pb</a></td>
-  </tr>
-  <tr>
-    <td>GraphSage</td>
-    <td>Graph Networks</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/graph_networks/graphsage/">pb</a></td>
-  </tr>
-  <tr>
-    <td>EleutherAI/gpt-j-6B</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/gpt-j">saved_model (smooth quant)</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Distillation
-<table>
-<thead>
-  <tr>
-    <th>Student Model</th>
-    <th>Teacher Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>MobileNet</td>
-    <td>DenseNet201</td>
-    <td>Image Recognition</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/distillation">pb</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Pruning
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>ResNet V2</td>
-    <td>Image Recognition</td>
-    <td>Structured (4x1, 2in4)</td>
-    <td><a href="./tensorflow/image_recognition/resnet_v2/pruning/magnitude">keras</a></td>
-  </tr>
-  <tr>
-    <td>ViT</td>
-    <td>Image Recognition</td>
-    <td>Structured (4x1, 2in4)</td>
-    <td><a href="./tensorflow/image_recognition/ViT/pruning/magnitude">keras</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Model Export
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>ResNet50 V1.5</td>
-    <td>Image Recognition</td>
-    <td>TF2ONNX</td>
-    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export">int8 fp32</a></td>
-  </tr>
-</tbody>
-</table>
-
-# PyTorch Examples
-## Quantization
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach </th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>ResNet18</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a> / <a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>ResNet18</td>
-    <td>Image Recognition</td>
-    <td>Quantization-Aware Training</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/qat/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a> / <a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>Quantization-Aware Training</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/qat/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>ResNeXt101_32x16d_wsl</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>ResNeXt101_32x8d</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Se_ResNeXt50_32x4d</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/se_resnext/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Inception V3</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>PeleeNet</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/peleenet/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>ResNeSt50</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/resnest/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>3D-UNet</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/image_recognition/3d-unet/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>SSD ResNet34</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/object_detection/ssd_resnet34/quantization/ptq/fx">fx</a> / <a href="./pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>YOLOv3</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/object_detection/yolo_v3/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Mask R-CNN</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/object_detection/maskrcnn/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>DLRM</td>
-    <td>Recommendation</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/recommendation/dlrm/quantization/ptq/ipex">ipex</a> / <a href="./pytorch/recommendation/dlrm/quantization/ptq/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>HuBERT</td>
-    <td>Speech Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>HuBERT</td>
-    <td>Speech Recognition</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>BlendCNN</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/blendcnn/quantization/ptq/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>bert-large-uncased-whole-word-masking-finetuned-squad</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx">fx</a> / <a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex">ipex(Intel GPU)</a></td>
-  </tr>
-  <tr>
-    <td>distilbert-base-uncased-distilled-squad</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex">ipex</a></td>
-  </tr>
-  <tr>
-    <td>yoshitomo-matsubara/bert-large-uncased-rte</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Intel/xlm-roberta-base-mrpc</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/distilbert-base-uncased-MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/albert-base-v2-MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Intel/xlm-roberta-base-mrpc</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>yoshitomo-matsubara/bert-large-uncased-rte</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Intel/bert-base-uncased-mrpc</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/bert-base-uncased-CoLA</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/bert-base-uncased-STS-B</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>gchhablani/bert-base-cased-finetuned-sst2</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>ModelTC/bert-base-uncased-rte</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/bert-base-uncased-QNLI</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>yoshitomo-matsubara/bert-large-uncased-cola</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/distilbert-base-uncased-MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Intel/xlnet-base-cased-mrpc</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>textattack/roberta-base-MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Intel/camembert-base-mrpc</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>t5-small</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>Helsinki-NLP/opus-mt-en-ro</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>lvwerra/pegasus-samsum</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>google/reformer-crime-and-punishment</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>EleutherAI/gpt-j-6B</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a> / <a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
-  </tr>
-  <tr>
-    <td>EleutherAI/gpt-j-6B</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Weight Only Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">weight_only</a></td>
-  </tr>
-  <tr>
-    <td>abeja/gpt-neox-japanese-2.7b</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>bigscience/bloom</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
-  </tr>
-  <tr>
-    <td>facebook/opt</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
-  </tr>
-  <tr>
-    <td>SD Diffusion</td>
-    <td>Text to Image</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-to-image/quantization">fx</a></td>
-  </tr>
-  <tr>
-    <td>openai/whisper-large</td>
-    <td>Speech Recognition</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-  <tr>
-    <td>torchaudio/wav2vec2</td>
-    <td>Speech Recognition</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
-  </tr>
-
-</tbody>
-</table>
-
-## Quantization with [Intel® Extension for Transformers](https://github.com/intel/intel-extension-for-transformers) based on Intel® Neural Compressor
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach </th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>T5 Large</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/summarization/quantization">fx</a></td>
-  </tr>
-</tbody>
-<tbody>
-  <tr>
-    <td>Flan T5 Large</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/summarization/quantization">fx</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Pruning
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Pruning Type </th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>Distilbert-base-uncased</td>
-    <td>Natural Language Processing (text classification)</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-mini</td>
-    <td>Natural Language Processing (text classification)</td>
-    <td>Structured (4x1, 2in4, per channel), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Distilbert-base-uncased</td>
-    <td>Natural Language Processing (question answering)</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-mini</td>
-    <td>Natural Language Processing (question answering)</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-base-uncased</td>
-    <td>Natural Language Processing (question answering)</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-large</td>
-    <td>Natural Language Processing (question answering)</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Flan-T5-small</td>
-    <td>Natural Language Processing (translation)</td>
-    <td>Structured (4x1)</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/translation/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>YOLOv5s6</td>
-    <td>Object Detection</td>
-    <td>Structured (4x1, 2in4), Unstructured</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/object_detection/yolo_v5/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>Structured (2x1)</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/image_recognition/ResNet50/pruning/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-base</td>
-    <td>Question Answering</td>
-    <td>Structured (channel, multi-head attention)</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/model_slim/">eager</a></td>
-  </tr>
-  <tr>
-    <td>Bert-large</td>
-    <td>Question Answering</td>
-    <td>Structured (channel, multi-head attention)</td>
-    <td>Snip-momentum</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/model_slim/">eager</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Distillation
-<table>
-<thead>
-  <tr>
-    <th>Student Model</th>
-    <th>Teacher Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>CNN-2</td>
-    <td>CNN-10</td>
-    <td>Image Recognition</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/image_recognition/CNN-2/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V2-0.35</td>
-    <td>WideResNet40-2</td>
-    <td>Image Recognition</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/image_recognition/MobileNetV2-0.35/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
-    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
-    <td>Image Recognition</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
-    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
-    <td>Image Recognition</td>
-    <td>Self Distillation</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/self_distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>VGG-8</td>
-    <td>VGG-13</td>
-    <td>Image Recognition</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/image_recognition/VGG-8/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>BlendCNN</td>
-    <td>BERT-Base</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/blendcnn/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>DistilBERT</td>
-    <td>BERT-Base</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>BiLSTM</td>
-    <td>RoBERTa-Base</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>TinyBERT</td>
-    <td>BERT-Base</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>BERT-3</td>
-    <td>BERT-Base</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
-  </tr>
-  <tr>
-    <td>DistilRoBERTa</td>
-    <td>RoBERTa-Large</td>
-    <td>Natural Language Processing</td>
-    <td>Knowledge Distillation</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
-  </tr>
-</tbody>
-</table>
-
-## Orchestration
+## Quantization
 <table>
 <thead>
   <tr>
     <th>Model</th>
     <th>Domain</th>
-    <th>Approach</th>
+    <th>Method </th>
     <th>Examples</th>
   </tr>
 </thead>
 <tbody>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>Multi-shot: Pruning and PTQ<br></td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx">link</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>One-shot: QAT during Pruning<br></td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx">link</a></td>
-  </tr>
-  <tr>
-    <td>Intel/bert-base-uncased-sparse-90-unstructured-pruneofa</td>
-    <td>Natural Language Processing (question-answering)</td>
-    <td>One-shot: Pruning, Distillation and QAT<br></td>
-    <td><a href="./pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx">link</a></td>
-  </tr>
-  <tr>
-    <td>Intel/bert-base-uncased-sparse-90-unstructured-pruneofa</td>
-    <td>Natural Language Processing (text-classification)</td>
-    <td>One-shot: Pruning, Distillation and QAT<br></td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx">link</a></td>
-  </tr>
-</tbody>
-</table>
 
-## Model Export
-<table>
-<thead>
-  <tr>
-    <th>Model</th>
-    <th>Domain</th>
-    <th>Approach</th>
-    <th>Examples</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>ResNet18</td>
-    <td>Image Recognition</td>
-    <td>PT2ONNX</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/export/fx">int8 fp32</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50</td>
-    <td>Image Recognition</td>
-    <td>PT2ONNX</td>
-    <td><a href="./pytorch/image_recognition/torchvision_models/export/fx">int8 fp32</a></td>
-  </tr>
-  <tr>
-    <td>bert base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>PT2ONNX</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/export/fx">int8 fp32</a></td>
-  </tr>
-  <tr>
-    <td>bert large MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>PT2ONNX</td>
-    <td><a href="./pytorch/nlp/huggingface_models/text-classification/export/fx">int8 fp32</a></td>
-  </tr>
+<tr>
+    <td rowspan="2">gpt_j</td>
+    <td rowspan="2">Natural Language Processing</td>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td rowspan="2">llama2_7b</td>
+    <td rowspan="2">Natural Language Processing</td>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td rowspan="3">opt_125m</td>
+    <td rowspan="3">Natural Language Processing</td>
+    <td>Static Quantization (IPEX)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization (PT2E)</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e">link</a></td>
+</tr>
+<tr>
+    <td>Weight-Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only">link</a></td>
+</tr>
+<tr>
+    <td rowspan="2">resnet18</td>
+    <td rowspan="2">Image Recognition</td>
+    <td>Mixed Precision</td>
+    <td><a href="./pytorch/cv/mixed_precision">link</a></td>
+</tr>
+<tr>
+    <td>Static Quantization</td>
+    <td><a href="./pytorch/cv/static_quant">link</a></td>
+</tr>
 </tbody>
 </table>
 
-# ONNX Runtime Examples (Deprecated)
+
+# TensorFlow Examples
+
 ## Quantization
 
 <table>
@@ -979,377 +73,96 @@ Intel® Neural Compressor validated examples with multiple compression technique
   <tr>
     <th>Model</th>
     <th>Domain</th>
-    <th>Approach </th>
+    <th>Method</th>
     <th>Examples</th>
   </tr>
 </thead>
 <tbody>
-  <tr>
-    <td>ResNet50 V1.5</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50 V1.5 MLPerf</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>VGG16</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/vgg16/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/vgg16/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V2</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V3 MLPerf</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>AlexNet (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>CaffeNet (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>DenseNet (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>EfficientNet (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>FCN (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>GoogleNet (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Inception V1 (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>MNIST (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>MobileNet V2 (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>ResNet50 V1.5 (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
+ <tr>
+    <td>bert_large_squad_model_zoo</td>
+    <td>Natural Language Processing</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>ShuffleNet V2 (ONNX Model Zoo)</td>
-    <td>Image Recognition</td>
+    <td><a href="./tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>transformer_lt</td>
+    <td>Natural Language Processing</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>SqueezeNet (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/nlp/transformer_lt/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>inception_v3</td>
     <td>Image Recognition</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>VGG16 (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/image_recognition/inception_v3/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>mobilenetv2</td>
     <td>Image Recognition</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>ZFNet (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/image_recognition/mobilenet_v2/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>resnetv2_50</td>
     <td>Image Recognition</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>ArcFace (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/image_recognition/resnet_v2_50/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>vgg16</td>
     <td>Image Recognition</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>BEiT</td>
+    <td><a href="./tensorflow/image_recognition/vgg16/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>ViT</td>
     <td>Image Recognition</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/image_recognition/beit/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>BERT base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/nlp/bert/quantization/ptq_static">integerops</a> / <a href="./onnxrt/nlp/bert/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>BERT base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/bert/quantization/ptq_dynamic">integerops</a></td>
-  </tr>
-  <tr>
-    <td>DistilBERT base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/distilbert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/distilbert/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Mobile bert MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/mobilebert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Roberta base MRPC</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/roberta/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/roberta/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>BERT SQuAD (ONNX Model Zoo)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic">integerops</a> </td>
-  </tr>
-  <tr>
-    <td>GPT2 lm head WikiText (ONNX Model Zoo)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic">integerops</a></td>
-  </tr>
-  <tr>
-    <td>MobileBERT SQuAD MLPerf (ONNX Model Zoo)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td><a href="./onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>BiDAF (ONNX Model Zoo)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic Quantization</td>
-    <td><a href="./onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic">integerops</a></td>
-  </tr>
-  <tr>
-    <td>Spanbert SQuAD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Bert base multilingual cased SQuAD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>DistilBert base uncased SQuAD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>BERT large uncased whole word masking SQuAD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Roberta large SQuAD v2 (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>GPT2 WikiText (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>DistilGPT2 WikiText (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>LayoutLMv3 FUNSD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>LayoutLMv2 FUNSD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>LayoutLM FUNSD (HuggingFace)</td>
-    <td>Natural Language Processing</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>SSD MobileNet V1</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>SSD MobileNet V2</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Table Transformer Structure Recognition</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/table_transformer/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>Table Transformer Detection</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/table_transformer/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>SSD MobileNet V1 (ONNX Model Zoo)</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>DUC (ONNX Model Zoo)</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>Faster R-CNN (ONNX Model Zoo)</td>
-    <td>Object Detection</td>
-    <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Mask R-CNN (ONNX Model Zoo)</td>
-    <td>Object Detection</td>
+    <td><a href="./tensorflow/image_recognition/vision_transformer/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>GraphSage</td>
+    <td>Graph Networks</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>SSD (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/graph_networks/graphsage/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>yolo_v5</td>
     <td>Object Detection</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static">qdq</a></td>
-  </tr>
-  <tr>
-    <td>Tiny YOLOv3 (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/object_detection/yolo_v5/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>faster_rcnn_resnet50</td>
     <td>Object Detection</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>YOLOv3 (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>mask_rcnn_inception_v2</td>
     <td>Object Detection</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>YOLOv4 (ONNX Model Zoo)</td>
+    <td><a href="./tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>ssd_mobilenet_v1</td>
     <td>Object Detection</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>Emotion FERPlus (ONNX Model Zoo)</td>
-    <td>Body Analysis</td>
+    <td><a href="./tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>wide_deep_large_ds</td>
+    <td>Recommendation</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>Ultra Face (ONNX Model Zoo)</td>
-    <td>Body Analysis</td>
+    <td><a href="./tensorflow/recommendation/wide_deep_large_ds/quantization/ptq">link</a></td>
+</tr>
+<tr>
+    <td>3dunet-mlperf</td>
+    <td>Semantic Image Segmentation</td>
     <td>Post-Training Static Quantization</td>
-    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static">qlinearops</a></td>
-  </tr>
-  <tr>
-    <td>GPT-J-6B (HuggingFace)</td>
-    <td>Text Generation</td>
-    <td>Post-Training Dynamic / Static Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static">qlinearops</a>
-    </td>
-  </tr>
-  <tr>
-    <td>Llama-7B (HuggingFace)</td>
-    <td>Text Generation</td>
-    <td>Static / Weight Only Quantization</td>
-    <td>
-      <a href="./onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only">weight_only</a>
-    </td>
-  </tr>
+    <td><a href="./tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq">link</a></td>
+</tr>
+
 </tbody>
 </table>
 
-# Notebook Examples
-
-* [Performance of FP32 Vs. INT8 ResNet50 Model (Deprecated)](/examples/notebook/perf_fp32_int8_tf): compare existed FP32 & INT8 ResNet50 model directly.
-* [Intel® Neural Compressor Sample for TensorFlow* (Deprecated)](/examples/notebook/tensorflow/alexnet_mnist): an End-To-End pipeline to build up a CNN model by TensorFlow to recognize handwriting number and speed up AI model by Intel® Neural Compressor.
-* [Accelerate VGG19 Inference on Intel® Gen4 Xeon® Sapphire Rapids (Deprecated)](/examples/notebook/tensorflow/vgg19_ibean): an End-To-End pipeline to train VGG19 model by transfer learning based on pre-trained model from [TensorFlow Hub](https://tfhub.dev); quantize it by Intel® Neural Compressor on Intel® Gen4 Xeon® Sapphire Rapids.
-
diff --git a/examples/deprecated/README.md b/examples/deprecated/README.md
new file mode 100644
index 00000000000..73ff04da7f3
--- /dev/null
+++ b/examples/deprecated/README.md
@@ -0,0 +1,1356 @@
+Examples
+==========
+Intel® Neural Compressor validated examples with multiple compression techniques, including quantization, pruning, knowledge distillation and orchestration. Part of the validated cases can be found in the example tables, and the release data is available [here](../../docs/source/validated_model_list.md).
+
+> Note: `3.x_api` here is the target example folder for version >= 3.0, which contains pytorch and tensorflow related examples. TensorFlow and ONNX related examples here are no longer maintained. 
+
+
+# Quick Get Started Notebook Examples
+* [Quick Get Started Notebook of Intel® Neural Compressor for ONNXRuntime (Deprecated)](/examples/deprecated/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb)
+
+* [Quick Get Started Notebook of Intel® Neural Compressor for Tensorflow (Deprecated)](/examples/deprecated/notebook/tensorflow/resnet/resnet_quantization.ipynb)
+
+# Helloworld Examples
+* [torch_llm](/examples/deprecated/helloworld/torch_woq): apply the weight-only quantization to LLMs.
+* [torch_non_llm](/examples/deprecated/helloworld/torch_static_quant): apply the static quantization to non-LLMs.
+* [tf_example1 (Deprecated)](/examples/deprecated/helloworld/tf_example1): quantize with built-in dataloader and metric.
+* [tf_example2 (Deprecated)](/examples/deprecated/helloworld/tf_example2): quantize keras model with customized metric and dataloader.
+* [tf_example3 (Deprecated)](/examples/deprecated/helloworld/tf_example3): convert model with mix precision.
+* [tf_example4 (Deprecated)](/examples/deprecated/helloworld/tf_example4): quantize checkpoint with dummy dataloader.
+* [tf_example5 (Deprecated)](/examples/deprecated/helloworld/tf_example5): config performance and accuracy measurement.
+* [tf_example6 (Deprecated)](/examples/deprecated/helloworld/tf_example6): use default user-facing APIs to quantize a pb model.
+* [tf_example7 (Deprecated)](/examples/deprecated/helloworld/tf_example7): quantize and benchmark with pure python API.
+
+# TensorFlow Examples (Deprecated)
+## Quantization
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet50 V1.0</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50 V1.5</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnet50/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>ResNet101</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnet101/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V1</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq">pb</a> / <a href="./keras/image_recognition/mobilenet_v2/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V3</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Inception V1</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Inception V2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Inception V3</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq">pb</a> / <a href="./keras/image_recognition/inception_v3/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>Inception V4</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Inception ResNet V2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq">pb</a> / <a href="./keras/image_recognition/inception_resnet_v2/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>VGG16</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq">pb</a> / <a href="./keras/image_recognition/vgg16/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>VGG19</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq">pb</a> / <a href="./keras/image_recognition/vgg19/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>ResNet V2 50</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnetv2_50/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>ResNet V2 101</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq">pb</a> / <a href="./keras/image_recognition/resnetv2_101/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>ResNet V2 152</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>DenseNet121</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>DenseNet161</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>DenseNet169</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>EfficientNet B0</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>Xception</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./keras/image_recognition/xception/quantization/ptq">keras</a></td>
+  </tr>
+  <tr>
+    <td>ResNet V2</td>
+    <td>Image Recognition</td>
+    <td>Quantization-Aware Training</td>
+    <td><a href="./tensorflow/image_recognition/resnet_v2/quantization/qat">keras</a> </td>
+  </tr>
+  <tr>
+    <td>EfficientNet V2 B0</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq">SavedModel</a></td>
+  </tr>
+  <tr>
+    <td>BERT base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/bert_base_mrpc/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>BERT large SQuAD (Model Zoo)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>BERT large SQuAD</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/bert_large_squad/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>DistilBERT base</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/distilbert_base/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Transformer LT</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/transformer_lt/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Transformer LT MLPerf</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/transformer_lt_mlperf/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>SSD ResNet50 V1</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>SSD MobileNet V1</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>Faster R-CNN Inception ResNet V2</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq">SavedModel</a></td>
+  </tr>
+  <tr>
+    <td>Faster R-CNN ResNet101</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq">SavedModel</a></td>
+  </tr>
+  <tr>
+    <td>Faster R-CNN ResNet50</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Mask R-CNN Inception V2</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq">pb</a> / <a href="./tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>SSD ResNet34</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>YOLOv3</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/object_detection/yolo_v3/quantization/ptq">pb</a></td>
+  </tr>
+  <tr>
+    <td>Wide & Deep</td>
+    <td>Recommendation</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/recommendation/wide_deep_large_ds/quantization/ptq">pb</a></td>
+  </tr> 
+  <tr>
+    <td>Arbitrary Style Transfer</td>
+    <td>Style Transfer</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq">ckpt</a></td>
+  </tr>
+  <tr>
+    <td>OPT</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/smoothquant">pb (smooth quant)</a></td>
+  </tr>
+  <tr>
+    <td>GPT2</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/smoothquant">pb (smooth quant)</a></td>
+  </tr>
+  <tr>
+    <td>ViT</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/vision_transformer/">pb</a></td>
+  </tr>
+  <tr>
+    <td>GraphSage</td>
+    <td>Graph Networks</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/graph_networks/graphsage/">pb</a></td>
+  </tr>
+  <tr>
+    <td>EleutherAI/gpt-j-6B</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./tensorflow/nlp/large_language_models/quantization/ptq/gpt-j">saved_model (smooth quant)</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Distillation
+<table>
+<thead>
+  <tr>
+    <th>Student Model</th>
+    <th>Teacher Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>MobileNet</td>
+    <td>DenseNet201</td>
+    <td>Image Recognition</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/distillation">pb</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Pruning
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet V2</td>
+    <td>Image Recognition</td>
+    <td>Structured (4x1, 2in4)</td>
+    <td><a href="./tensorflow/image_recognition/resnet_v2/pruning/magnitude">keras</a></td>
+  </tr>
+  <tr>
+    <td>ViT</td>
+    <td>Image Recognition</td>
+    <td>Structured (4x1, 2in4)</td>
+    <td><a href="./tensorflow/image_recognition/ViT/pruning/magnitude">keras</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Model Export
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet50 V1.5</td>
+    <td>Image Recognition</td>
+    <td>TF2ONNX</td>
+    <td><a href="./tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export">int8 fp32</a></td>
+  </tr>
+</tbody>
+</table>
+
+# PyTorch Examples
+## Quantization
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach </th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet18</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a> / <a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>ResNet18</td>
+    <td>Image Recognition</td>
+    <td>Quantization-Aware Training</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/qat/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a> / <a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>Quantization-Aware Training</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/qat/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>ResNeXt101_32x16d_wsl</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>ResNeXt101_32x8d</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Se_ResNeXt50_32x4d</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/se_resnext/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Inception V3</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>PeleeNet</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/peleenet/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>ResNeSt50</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/resnest/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>3D-UNet</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/image_recognition/3d-unet/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>SSD ResNet34</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/object_detection/ssd_resnet34/quantization/ptq/fx">fx</a> / <a href="./pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>YOLOv3</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/object_detection/yolo_v3/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Mask R-CNN</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/object_detection/maskrcnn/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>DLRM</td>
+    <td>Recommendation</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/recommendation/dlrm/quantization/ptq/ipex">ipex</a> / <a href="./pytorch/recommendation/dlrm/quantization/ptq/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>HuBERT</td>
+    <td>Speech Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>HuBERT</td>
+    <td>Speech Recognition</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>BlendCNN</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/blendcnn/quantization/ptq/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>bert-large-uncased-whole-word-masking-finetuned-squad</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx">fx</a> / <a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex">ipex(Intel GPU)</a></td>
+  </tr>
+  <tr>
+    <td>distilbert-base-uncased-distilled-squad</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex">ipex</a></td>
+  </tr>
+  <tr>
+    <td>yoshitomo-matsubara/bert-large-uncased-rte</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Intel/xlm-roberta-base-mrpc</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/distilbert-base-uncased-MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/albert-base-v2-MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Intel/xlm-roberta-base-mrpc</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>yoshitomo-matsubara/bert-large-uncased-rte</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Intel/bert-base-uncased-mrpc</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/bert-base-uncased-CoLA</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/bert-base-uncased-STS-B</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>gchhablani/bert-base-cased-finetuned-sst2</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>ModelTC/bert-base-uncased-rte</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/bert-base-uncased-QNLI</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>yoshitomo-matsubara/bert-large-uncased-cola</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/distilbert-base-uncased-MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Intel/xlnet-base-cased-mrpc</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>textattack/roberta-base-MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Intel/camembert-base-mrpc</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>t5-small</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>Helsinki-NLP/opus-mt-en-ro</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>lvwerra/pegasus-samsum</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>google/reformer-crime-and-punishment</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>EleutherAI/gpt-j-6B</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a> / <a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
+  </tr>
+  <tr>
+    <td>EleutherAI/gpt-j-6B</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Weight Only Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">weight_only</a></td>
+  </tr>
+  <tr>
+    <td>abeja/gpt-neox-japanese-2.7b</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>bigscience/bloom</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
+  </tr>
+  <tr>
+    <td>facebook/opt</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/language-modeling/quantization/llm">smooth quant</a></td>
+  </tr>
+  <tr>
+    <td>SD Diffusion</td>
+    <td>Text to Image</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-to-image/quantization">fx</a></td>
+  </tr>
+  <tr>
+    <td>openai/whisper-large</td>
+    <td>Speech Recognition</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+  <tr>
+    <td>torchaudio/wav2vec2</td>
+    <td>Speech Recognition</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
+
+</tbody>
+</table>
+
+## Quantization with [Intel® Extension for Transformers](https://github.com/intel/intel-extension-for-transformers) based on Intel® Neural Compressor
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach </th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>T5 Large</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/summarization/quantization">fx</a></td>
+  </tr>
+</tbody>
+<tbody>
+  <tr>
+    <td>Flan T5 Large</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td><a href="https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/summarization/quantization">fx</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Pruning
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Pruning Type </th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>Distilbert-base-uncased</td>
+    <td>Natural Language Processing (text classification)</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-mini</td>
+    <td>Natural Language Processing (text classification)</td>
+    <td>Structured (4x1, 2in4, per channel), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Distilbert-base-uncased</td>
+    <td>Natural Language Processing (question answering)</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-mini</td>
+    <td>Natural Language Processing (question answering)</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-base-uncased</td>
+    <td>Natural Language Processing (question answering)</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-large</td>
+    <td>Natural Language Processing (question answering)</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Flan-T5-small</td>
+    <td>Natural Language Processing (translation)</td>
+    <td>Structured (4x1)</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/translation/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>YOLOv5s6</td>
+    <td>Object Detection</td>
+    <td>Structured (4x1, 2in4), Unstructured</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/object_detection/yolo_v5/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>Structured (2x1)</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/image_recognition/ResNet50/pruning/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-base</td>
+    <td>Question Answering</td>
+    <td>Structured (channel, multi-head attention)</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/model_slim/">eager</a></td>
+  </tr>
+  <tr>
+    <td>Bert-large</td>
+    <td>Question Answering</td>
+    <td>Structured (channel, multi-head attention)</td>
+    <td>Snip-momentum</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/model_slim/">eager</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Distillation
+<table>
+<thead>
+  <tr>
+    <th>Student Model</th>
+    <th>Teacher Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>CNN-2</td>
+    <td>CNN-10</td>
+    <td>Image Recognition</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/image_recognition/CNN-2/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V2-0.35</td>
+    <td>WideResNet40-2</td>
+    <td>Image Recognition</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/image_recognition/MobileNetV2-0.35/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
+    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
+    <td>Image Recognition</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
+    <td>ResNet18|ResNet34|ResNet50|ResNet101</td>
+    <td>Image Recognition</td>
+    <td>Self Distillation</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/self_distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>VGG-8</td>
+    <td>VGG-13</td>
+    <td>Image Recognition</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/image_recognition/VGG-8/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>BlendCNN</td>
+    <td>BERT-Base</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/blendcnn/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>DistilBERT</td>
+    <td>BERT-Base</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>BiLSTM</td>
+    <td>RoBERTa-Base</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>TinyBERT</td>
+    <td>BERT-Base</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>BERT-3</td>
+    <td>BERT-Base</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
+  </tr>
+  <tr>
+    <td>DistilRoBERTa</td>
+    <td>RoBERTa-Large</td>
+    <td>Natural Language Processing</td>
+    <td>Knowledge Distillation</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/distillation/eager">eager</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Orchestration
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>Multi-shot: Pruning and PTQ<br></td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx">link</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>One-shot: QAT during Pruning<br></td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx">link</a></td>
+  </tr>
+  <tr>
+    <td>Intel/bert-base-uncased-sparse-90-unstructured-pruneofa</td>
+    <td>Natural Language Processing (question-answering)</td>
+    <td>One-shot: Pruning, Distillation and QAT<br></td>
+    <td><a href="./pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx">link</a></td>
+  </tr>
+  <tr>
+    <td>Intel/bert-base-uncased-sparse-90-unstructured-pruneofa</td>
+    <td>Natural Language Processing (text-classification)</td>
+    <td>One-shot: Pruning, Distillation and QAT<br></td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx">link</a></td>
+  </tr>
+</tbody>
+</table>
+
+## Model Export
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach</th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet18</td>
+    <td>Image Recognition</td>
+    <td>PT2ONNX</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/export/fx">int8 fp32</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50</td>
+    <td>Image Recognition</td>
+    <td>PT2ONNX</td>
+    <td><a href="./pytorch/image_recognition/torchvision_models/export/fx">int8 fp32</a></td>
+  </tr>
+  <tr>
+    <td>bert base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>PT2ONNX</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/export/fx">int8 fp32</a></td>
+  </tr>
+  <tr>
+    <td>bert large MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>PT2ONNX</td>
+    <td><a href="./pytorch/nlp/huggingface_models/text-classification/export/fx">int8 fp32</a></td>
+  </tr>
+</tbody>
+</table>
+
+# ONNX Runtime Examples (Deprecated)
+## Quantization
+
+<table>
+<thead>
+  <tr>
+    <th>Model</th>
+    <th>Domain</th>
+    <th>Approach </th>
+    <th>Examples</th>
+  </tr>
+</thead>
+<tbody>
+  <tr>
+    <td>ResNet50 V1.5</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50 V1.5 MLPerf</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>VGG16</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/vgg16/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/vgg16/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V2</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V3 MLPerf</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>AlexNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>CaffeNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>DenseNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>EfficientNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>FCN (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>GoogleNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Inception V1 (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>MNIST (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>MobileNet V2 (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>ResNet50 V1.5 (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>ShuffleNet V2 (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>SqueezeNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>VGG16 (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>ZFNet (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>ArcFace (ONNX Model Zoo)</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>BEiT</td>
+    <td>Image Recognition</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/image_recognition/beit/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>BERT base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/nlp/bert/quantization/ptq_static">integerops</a> / <a href="./onnxrt/nlp/bert/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>BERT base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./onnxrt/nlp/bert/quantization/ptq_dynamic">integerops</a></td>
+  </tr>
+  <tr>
+    <td>DistilBERT base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td><a href="./onnxrt/nlp/distilbert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/distilbert/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Mobile bert MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td><a href="./onnxrt/nlp/mobilebert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/mobilebert/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Roberta base MRPC</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td><a href="./onnxrt/nlp/roberta/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/roberta/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>BERT SQuAD (ONNX Model Zoo)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic">integerops</a> </td>
+  </tr>
+  <tr>
+    <td>GPT2 lm head WikiText (ONNX Model Zoo)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic">integerops</a></td>
+  </tr>
+  <tr>
+    <td>MobileBERT SQuAD MLPerf (ONNX Model Zoo)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td><a href="./onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>BiDAF (ONNX Model Zoo)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic">integerops</a></td>
+  </tr>
+  <tr>
+    <td>Spanbert SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>Bert base multilingual cased SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>DistilBert base uncased SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>BERT large uncased whole word masking SQuAD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>Roberta large SQuAD v2 (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+        <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>GPT2 WikiText (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>DistilGPT2 WikiText (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>LayoutLMv3 FUNSD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>LayoutLMv2 FUNSD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>LayoutLM FUNSD (HuggingFace)</td>
+    <td>Natural Language Processing</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>SSD MobileNet V1</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>SSD MobileNet V2</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Table Transformer Structure Recognition</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/table_transformer/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>Table Transformer Detection</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/table_transformer/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>SSD MobileNet V1 (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>DUC (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>Faster R-CNN (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Mask R-CNN (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>SSD (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static">qdq</a></td>
+  </tr>
+  <tr>
+    <td>Tiny YOLOv3 (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>YOLOv3 (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>YOLOv4 (ONNX Model Zoo)</td>
+    <td>Object Detection</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>Emotion FERPlus (ONNX Model Zoo)</td>
+    <td>Body Analysis</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>Ultra Face (ONNX Model Zoo)</td>
+    <td>Body Analysis</td>
+    <td>Post-Training Static Quantization</td>
+    <td><a href="./onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static">qlinearops</a></td>
+  </tr>
+  <tr>
+    <td>GPT-J-6B (HuggingFace)</td>
+    <td>Text Generation</td>
+    <td>Post-Training Dynamic / Static Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic">integerops</a> / <a href="./onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static">qlinearops</a>
+    </td>
+  </tr>
+  <tr>
+    <td>Llama-7B (HuggingFace)</td>
+    <td>Text Generation</td>
+    <td>Static / Weight Only Quantization</td>
+    <td>
+      <a href="./onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static">qlinearops</a> / <a href="./onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only">weight_only</a>
+    </td>
+  </tr>
+</tbody>
+</table>
+
+# Notebook Examples
+
+* [Performance of FP32 Vs. INT8 ResNet50 Model (Deprecated)](/examples/deprecated/notebook/perf_fp32_int8_tf): compare existed FP32 & INT8 ResNet50 model directly.
+* [Intel® Neural Compressor Sample for TensorFlow* (Deprecated)](/examples/deprecated/notebook/tensorflow/alexnet_mnist): an End-To-End pipeline to build up a CNN model by TensorFlow to recognize handwriting number and speed up AI model by Intel® Neural Compressor.
+* [Accelerate VGG19 Inference on Intel® Gen4 Xeon® Sapphire Rapids (Deprecated)](/examples/deprecated/notebook/tensorflow/vgg19_ibean): an End-To-End pipeline to train VGG19 model by transfer learning based on pre-trained model from [TensorFlow Hub](https://tfhub.dev); quantize it by Intel® Neural Compressor on Intel® Gen4 Xeon® Sapphire Rapids.
+
diff --git a/examples/deprecated/helloworld/README.md b/examples/deprecated/helloworld/README.md
new file mode 100644
index 00000000000..791b080e548
--- /dev/null
+++ b/examples/deprecated/helloworld/README.md
@@ -0,0 +1,26 @@
+# Hello World Examples
+
+## PyTorch Examples
+### Examples List
+* [torch_llm](/examples/deprecated/helloworld/torch_woq): apply the weight-only quantization to LLMs.
+* [torch_non_llm](/examples/deprecated/helloworld/torch_static_quant): apply the static quantization to non-LLMs.
+
+## Tensorflow Examples (Deprecated)
+### Prerequisite
+Enter the following commands to prepare a dataset and pretrained models for the included Hello World examples:
+
+```shell
+pip install intel-tensorflow==2.10.0
+python train.py
+```
+The `train.py` script generates a saved model and a frozen pb at ./models for your use.
+> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### Examples List
+* [tf_example1](/examples/deprecated/helloworld/tf_example1): quantize with built-in dataloader and metric.
+* [tf_example2](/examples/deprecated/helloworld/tf_example2): quantize keras model with customized metric and dataloader.
+* [tf_example3](/examples/deprecated/helloworld/tf_example3): convert model with mix precision.
+* [tf_example4](/examples/deprecated/helloworld/tf_example4): quantize checkpoint with dummy dataloader.
+* [tf_example5](/examples/deprecated/helloworld/tf_example5): configure performance and accuracy measurement.
+* [tf_example6](/examples/deprecated/helloworld/tf_example6): use default user-facing APIs to quantize a pb model.
+* [tf_example7](/examples/deprecated/helloworld/tf_example7): quantize model with dummy dataset.
diff --git a/examples/helloworld/fp8_example/README.md b/examples/deprecated/helloworld/fp8_example/README.md
similarity index 86%
rename from examples/helloworld/fp8_example/README.md
rename to examples/deprecated/helloworld/fp8_example/README.md
index b758768ef0f..95943b8ee52 100644
--- a/examples/helloworld/fp8_example/README.md
+++ b/examples/deprecated/helloworld/fp8_example/README.md
@@ -1,96 +1,96 @@
-### Usage demo:
-
-#### two steps to get quantized model
-
-```diff
-import torch
-+ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
-import habana_frameworks.torch.core as htcore
-
-class M(torch.nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-        self.fc1 = torch.nn.Linear(10, 5)
-        self.fc2 = torch.nn.Linear(5, 10)
-
-    def forward(self, inp):
-        x1 = self.fc1(inp)
-        x2 = self.fc2(x1)
-        return x2
-
-model = M().eval()
-
-+ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
-
-+ if config.measure:
-+   model = prepare(model, config)
-
-+ if config.quantize:
-+     htcore.hpu_initialize()
-+     model = convert(model, config)
-
-# user code run
-with torch.no_grad():
-    model.to("hpu")
-    output = model(torch.randn(1, 10).to("hpu"))
-    print(output)
-
-+ if config.measure:
-+    finalize_calibration(model)
-```
-
-
-Whole script and config refer to [sample_two_steps.py](./sample_two_steps.py), [maxabs_measure.json](./maxabs_measure.json) and [maxabs_quant.json](./maxabs_quant.json).
-
-First, measure the tensor quantization statistic:
-```shell
-python sample_two_steps.py --quant_config=maxabs_measure.json
-```
-
-Then quantize the model based on previous measurements:
-```shell
-python sample_two_steps.py --quant_config=maxabs_quant.json
-```
-
-#### one step to get quantized model
-
-```diff
-import torch
-+ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
-import habana_frameworks.torch.core as htcore
-
-class M(torch.nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-        self.fc1 = torch.nn.Linear(10, 5)
-        self.fc2 = torch.nn.Linear(5, 10)
-
-    def forward(self, inp):
-        x1 = self.fc1(inp)
-        x2 = self.fc2(x1)
-        return x2
-
-model = M().to("hpu")
-
-+ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
-+ model = prepare(model, config)
-
-# user code run to do calibration
-with torch.no_grad():
-    output = model(torch.randn(1, 10).to("hpu"))
-    print(output)
-
-+ finalize_calibration(model)
-+ model = convert(model)
-
-# user code to run benchmark for quantized model
-with torch.no_grad():
-    output = model(torch.randn(1, 10).to("hpu"))
-    print(output)
-```
-
-Whole script and config refer to [sample_one_step.py](./sample_one_step.py).
-
-```shell
-python sample_one_step.py --quant_config=quant_config.json
-```
+### Usage demo:
+
+#### two steps to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+model = M().eval()
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
+
++ if config.measure:
++   model = prepare(model, config)
+
++ if config.quantize:
++     htcore.hpu_initialize()
++     model = convert(model, config)
+
+# user code run
+with torch.no_grad():
+    model.to("hpu")
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+
++ if config.measure:
++    finalize_calibration(model)
+```
+
+
+Whole script and config refer to [sample_two_steps.py](sample_two_steps.py), [maxabs_measure.json](maxabs_measure.json) and [maxabs_quant.json](maxabs_quant.json).
+
+First, measure the tensor quantization statistic:
+```shell
+python sample_two_steps.py --quant_config=maxabs_measure.json
+```
+
+Then quantize the model based on previous measurements:
+```shell
+python sample_two_steps.py --quant_config=maxabs_quant.json
+```
+
+#### one step to get quantized model
+
+```diff
+import torch
++ from neural_compressor.torch.quantization import FP8Config, convert, prepare, finalize_calibration
+import habana_frameworks.torch.core as htcore
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+model = M().to("hpu")
+
++ config = FP8Config.from_json_file(args.quant_config) # args.quant_config is the path of json file
++ model = prepare(model, config)
+
+# user code run to do calibration
+with torch.no_grad():
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+
++ finalize_calibration(model)
++ model = convert(model)
+
+# user code to run benchmark for quantized model
+with torch.no_grad():
+    output = model(torch.randn(1, 10).to("hpu"))
+    print(output)
+```
+
+Whole script and config refer to [sample_one_step.py](sample_one_step.py).
+
+```shell
+python sample_one_step.py --quant_config=quant_config.json
+```
diff --git a/examples/helloworld/fp8_example/maxabs_measure.json b/examples/deprecated/helloworld/fp8_example/maxabs_measure.json
similarity index 100%
rename from examples/helloworld/fp8_example/maxabs_measure.json
rename to examples/deprecated/helloworld/fp8_example/maxabs_measure.json
diff --git a/examples/helloworld/fp8_example/maxabs_quant.json b/examples/deprecated/helloworld/fp8_example/maxabs_quant.json
similarity index 100%
rename from examples/helloworld/fp8_example/maxabs_quant.json
rename to examples/deprecated/helloworld/fp8_example/maxabs_quant.json
diff --git a/examples/helloworld/fp8_example/quant_config.json b/examples/deprecated/helloworld/fp8_example/quant_config.json
similarity index 100%
rename from examples/helloworld/fp8_example/quant_config.json
rename to examples/deprecated/helloworld/fp8_example/quant_config.json
diff --git a/examples/helloworld/fp8_example/sample_one_step.py b/examples/deprecated/helloworld/fp8_example/sample_one_step.py
similarity index 100%
rename from examples/helloworld/fp8_example/sample_one_step.py
rename to examples/deprecated/helloworld/fp8_example/sample_one_step.py
diff --git a/examples/helloworld/fp8_example/sample_two_steps.py b/examples/deprecated/helloworld/fp8_example/sample_two_steps.py
similarity index 96%
rename from examples/helloworld/fp8_example/sample_two_steps.py
rename to examples/deprecated/helloworld/fp8_example/sample_two_steps.py
index 9e17748b9b0..231f566d3bb 100644
--- a/examples/helloworld/fp8_example/sample_two_steps.py
+++ b/examples/deprecated/helloworld/fp8_example/sample_two_steps.py
@@ -1,50 +1,50 @@
-import argparse
-import torch
-import habana_frameworks.torch.core as htcore
-htcore.hpu_set_env()
-
-from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
-
-torch.manual_seed(1)
-
-# 1. python sample_two_steps.py --quant_config=maxabs_measure.json
-# 2. python sample_two_steps.py --quant_config=maxabs_quant.json
-
-
-class M(torch.nn.Module):
-    def __init__(self) -> None:
-        super().__init__()
-        self.fc1 = torch.nn.Linear(10, 5)
-        self.fc2 = torch.nn.Linear(5, 10)
-
-    def forward(self, inp):
-        x1 = self.fc1(inp)
-        x2 = self.fc2(x1)
-        return x2
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument("--quant_config", type=str, help="json file of quantization config")
-    args = parser.parse_args()
-
-    model = M().eval()
-    config = FP8Config.from_json_file(args.quant_config)
-
-    if config.measure:
-        model = prepare(model, config)
-
-    if config.quantize:
-        htcore.hpu_initialize()
-        model = convert(model, config)
-        print(model)
-
-    with torch.no_grad():
-        model.to("hpu")
-        output = model(torch.randn(1, 10).to("hpu"))
-        print(output)
-
-    if config.measure:
-        finalize_calibration(model)
+import argparse
+import torch
+import habana_frameworks.torch.core as htcore
+htcore.hpu_set_env()
+
+from neural_compressor.torch.quantization import FP8Config, convert, finalize_calibration, prepare
+
+torch.manual_seed(1)
+
+# 1. python sample_two_steps.py --quant_config=maxabs_measure.json
+# 2. python sample_two_steps.py --quant_config=maxabs_quant.json
+
+
+class M(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.fc1 = torch.nn.Linear(10, 5)
+        self.fc2 = torch.nn.Linear(5, 10)
+
+    def forward(self, inp):
+        x1 = self.fc1(inp)
+        x2 = self.fc2(x1)
+        return x2
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Habana FP8 sample code.", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--quant_config", type=str, help="json file of quantization config")
+    args = parser.parse_args()
+
+    model = M().eval()
+    config = FP8Config.from_json_file(args.quant_config)
+
+    if config.measure:
+        model = prepare(model, config)
+
+    if config.quantize:
+        htcore.hpu_initialize()
+        model = convert(model, config)
+        print(model)
+
+    with torch.no_grad():
+        model.to("hpu")
+        output = model(torch.randn(1, 10).to("hpu"))
+        print(output)
+
+    if config.measure:
+        finalize_calibration(model)
diff --git a/examples/helloworld/tf_example1/README.md b/examples/deprecated/helloworld/tf_example1/README.md
similarity index 92%
rename from examples/helloworld/tf_example1/README.md
rename to examples/deprecated/helloworld/tf_example1/README.md
index e97b5413455..c7c419e32cf 100644
--- a/examples/helloworld/tf_example1/README.md
+++ b/examples/deprecated/helloworld/tf_example1/README.md
@@ -14,7 +14,7 @@ pip install -r requirements.txt
 
 ### 2. Prepare Dataset  
 TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
-We also prepared related scripts in [TF image_recognition example](/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
+We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
 
 ### 3. Download the FP32 model
 ```shell
diff --git a/examples/helloworld/tf_example1/requirements.txt b/examples/deprecated/helloworld/tf_example1/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example1/requirements.txt
rename to examples/deprecated/helloworld/tf_example1/requirements.txt
diff --git a/examples/helloworld/tf_example1/test.py b/examples/deprecated/helloworld/tf_example1/test.py
similarity index 100%
rename from examples/helloworld/tf_example1/test.py
rename to examples/deprecated/helloworld/tf_example1/test.py
diff --git a/examples/helloworld/tf_example2/README.md b/examples/deprecated/helloworld/tf_example2/README.md
similarity index 100%
rename from examples/helloworld/tf_example2/README.md
rename to examples/deprecated/helloworld/tf_example2/README.md
diff --git a/examples/helloworld/tf_example2/requirements.txt b/examples/deprecated/helloworld/tf_example2/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example2/requirements.txt
rename to examples/deprecated/helloworld/tf_example2/requirements.txt
diff --git a/examples/helloworld/tf_example2/test.py b/examples/deprecated/helloworld/tf_example2/test.py
similarity index 100%
rename from examples/helloworld/tf_example2/test.py
rename to examples/deprecated/helloworld/tf_example2/test.py
diff --git a/examples/helloworld/tf_example3/README.md b/examples/deprecated/helloworld/tf_example3/README.md
similarity index 100%
rename from examples/helloworld/tf_example3/README.md
rename to examples/deprecated/helloworld/tf_example3/README.md
diff --git a/examples/helloworld/tf_example3/requirements.txt b/examples/deprecated/helloworld/tf_example3/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example3/requirements.txt
rename to examples/deprecated/helloworld/tf_example3/requirements.txt
diff --git a/examples/helloworld/tf_example3/test.py b/examples/deprecated/helloworld/tf_example3/test.py
similarity index 100%
rename from examples/helloworld/tf_example3/test.py
rename to examples/deprecated/helloworld/tf_example3/test.py
diff --git a/examples/helloworld/tf_example4/README.md b/examples/deprecated/helloworld/tf_example4/README.md
similarity index 100%
rename from examples/helloworld/tf_example4/README.md
rename to examples/deprecated/helloworld/tf_example4/README.md
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt b/examples/deprecated/helloworld/tf_example4/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt
rename to examples/deprecated/helloworld/tf_example4/requirements.txt
diff --git a/examples/helloworld/tf_example4/test.py b/examples/deprecated/helloworld/tf_example4/test.py
similarity index 100%
rename from examples/helloworld/tf_example4/test.py
rename to examples/deprecated/helloworld/tf_example4/test.py
diff --git a/examples/helloworld/tf_example5/README.md b/examples/deprecated/helloworld/tf_example5/README.md
similarity index 94%
rename from examples/helloworld/tf_example5/README.md
rename to examples/deprecated/helloworld/tf_example5/README.md
index 3c61674411b..cbb1605b9cd 100644
--- a/examples/helloworld/tf_example5/README.md
+++ b/examples/deprecated/helloworld/tf_example5/README.md
@@ -15,7 +15,7 @@ pip install -r requirements.txt
 
 ### 2. Prepare Dataset  
 TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
-We also prepared related scripts in [TF image_recognition example](/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). 
+We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). 
 
 ### 3. Download the FP32 model
 ```shell
diff --git a/examples/helloworld/tf_example5/requirements.txt b/examples/deprecated/helloworld/tf_example5/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example5/requirements.txt
rename to examples/deprecated/helloworld/tf_example5/requirements.txt
diff --git a/examples/helloworld/tf_example5/test.py b/examples/deprecated/helloworld/tf_example5/test.py
similarity index 100%
rename from examples/helloworld/tf_example5/test.py
rename to examples/deprecated/helloworld/tf_example5/test.py
diff --git a/examples/helloworld/tf_example6/README.md b/examples/deprecated/helloworld/tf_example6/README.md
similarity index 94%
rename from examples/helloworld/tf_example6/README.md
rename to examples/deprecated/helloworld/tf_example6/README.md
index a711082128e..b43cf430d70 100644
--- a/examples/helloworld/tf_example6/README.md
+++ b/examples/deprecated/helloworld/tf_example6/README.md
@@ -15,7 +15,7 @@ pip install -r requirements.txt
 
 ### 2. Prepare Dataset  
 TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
-We also prepared related scripts in [TF image_recognition example](/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). 
+We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset). 
 
 ### 3. Download the FP32 model
 ```shell
diff --git a/examples/helloworld/tf_example6/requirements.txt b/examples/deprecated/helloworld/tf_example6/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example6/requirements.txt
rename to examples/deprecated/helloworld/tf_example6/requirements.txt
diff --git a/examples/helloworld/tf_example6/test.py b/examples/deprecated/helloworld/tf_example6/test.py
similarity index 100%
rename from examples/helloworld/tf_example6/test.py
rename to examples/deprecated/helloworld/tf_example6/test.py
diff --git a/examples/helloworld/tf_example7/README.md b/examples/deprecated/helloworld/tf_example7/README.md
similarity index 100%
rename from examples/helloworld/tf_example7/README.md
rename to examples/deprecated/helloworld/tf_example7/README.md
diff --git a/examples/helloworld/tf_example7/requirements.txt b/examples/deprecated/helloworld/tf_example7/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example7/requirements.txt
rename to examples/deprecated/helloworld/tf_example7/requirements.txt
diff --git a/examples/helloworld/tf_example7/test.py b/examples/deprecated/helloworld/tf_example7/test.py
similarity index 100%
rename from examples/helloworld/tf_example7/test.py
rename to examples/deprecated/helloworld/tf_example7/test.py
diff --git a/examples/helloworld/torch_static_quant/README.md b/examples/deprecated/helloworld/torch_static_quant/README.md
similarity index 100%
rename from examples/helloworld/torch_static_quant/README.md
rename to examples/deprecated/helloworld/torch_static_quant/README.md
diff --git a/examples/helloworld/torch_static_quant/quant_resnet18.py b/examples/deprecated/helloworld/torch_static_quant/quant_resnet18.py
similarity index 100%
rename from examples/helloworld/torch_static_quant/quant_resnet18.py
rename to examples/deprecated/helloworld/torch_static_quant/quant_resnet18.py
diff --git a/examples/helloworld/torch_static_quant/requirements.txt b/examples/deprecated/helloworld/torch_static_quant/requirements.txt
similarity index 100%
rename from examples/helloworld/torch_static_quant/requirements.txt
rename to examples/deprecated/helloworld/torch_static_quant/requirements.txt
diff --git a/examples/helloworld/torch_woq/README.md b/examples/deprecated/helloworld/torch_woq/README.md
similarity index 100%
rename from examples/helloworld/torch_woq/README.md
rename to examples/deprecated/helloworld/torch_woq/README.md
diff --git a/examples/helloworld/torch_woq/quant_mistral.py b/examples/deprecated/helloworld/torch_woq/quant_mistral.py
similarity index 100%
rename from examples/helloworld/torch_woq/quant_mistral.py
rename to examples/deprecated/helloworld/torch_woq/quant_mistral.py
diff --git a/examples/helloworld/torch_woq/requirements.txt b/examples/deprecated/helloworld/torch_woq/requirements.txt
similarity index 100%
rename from examples/helloworld/torch_woq/requirements.txt
rename to examples/deprecated/helloworld/torch_woq/requirements.txt
diff --git a/examples/helloworld/torch_woq_autoround/README.md b/examples/deprecated/helloworld/torch_woq_autoround/README.md
similarity index 100%
rename from examples/helloworld/torch_woq_autoround/README.md
rename to examples/deprecated/helloworld/torch_woq_autoround/README.md
diff --git a/examples/helloworld/torch_woq_autoround/requirements.txt b/examples/deprecated/helloworld/torch_woq_autoround/requirements.txt
similarity index 100%
rename from examples/helloworld/torch_woq_autoround/requirements.txt
rename to examples/deprecated/helloworld/torch_woq_autoround/requirements.txt
diff --git a/examples/helloworld/torch_woq_autoround/test.py b/examples/deprecated/helloworld/torch_woq_autoround/test.py
similarity index 100%
rename from examples/helloworld/torch_woq_autoround/test.py
rename to examples/deprecated/helloworld/torch_woq_autoround/test.py
diff --git a/examples/helloworld/train.py b/examples/deprecated/helloworld/train.py
similarity index 100%
rename from examples/helloworld/train.py
rename to examples/deprecated/helloworld/train.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py b/examples/deprecated/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
rename to examples/deprecated/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh b/examples/deprecated/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
rename to examples/deprecated/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh b/examples/deprecated/keras/image_recognition/imagenet_prepare/download_imagenet.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh
rename to examples/deprecated/keras/image_recognition/imagenet_prepare/download_imagenet.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt b/examples/deprecated/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
rename to examples/deprecated/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt b/examples/deprecated/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
rename to examples/deprecated/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py
similarity index 97%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py
index f7ea841eef2..42441598eb8 100644
--- a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py
+++ b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/prepare_model.py
@@ -1,35 +1,35 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import argparse
-import tensorflow as tf
-def get_inception_resnet_v2_model(saved_path):
-    model = tf.keras.applications.InceptionResNetV2(weights='imagenet')
-    model.save(saved_path)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export pretained keras model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help='path to exported model file')
-
-    args = parser.parse_args()
-    get_inception_resnet_v2_model(args.output_model)
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import tensorflow as tf
+def get_inception_resnet_v2_model(saved_path):
+    model = tf.keras.applications.InceptionResNetV2(weights='imagenet')
+    model.save(saved_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export pretained keras model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help='path to exported model file')
+
+    args = parser.parse_args()
+    get_inception_resnet_v2_model(args.output_model)
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py
similarity index 97%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py
index ae96dcb1859..e31b3e83de0 100644
--- a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py
+++ b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/prepare_model.py
@@ -1,35 +1,35 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import argparse
-from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
-def get_mobilenet_v2_model(saved_path):
-    model = MobileNetV2(weights='imagenet')
-    model.save(saved_path)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export pretained keras model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help='path to exported model file')
-
-    args = parser.parse_args()
-    get_mobilenet_v2_model(args.output_model)
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+from tensorflow.keras.applications.mobilenet_v2 import MobileNetV2
+def get_mobilenet_v2_model(saved_path):
+    model = MobileNetV2(weights='imagenet')
+    model.save(saved_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export pretained keras model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help='path to exported model file')
+
+    args = parser.parse_args()
+    get_mobilenet_v2_model(args.output_model)
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh b/examples/deprecated/keras/image_recognition/prepare_dataset.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/prepare_dataset.sh
rename to examples/deprecated/keras/image_recognition/prepare_dataset.sh
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py
similarity index 97%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py
index 552a0942157..3c16f5353be 100644
--- a/examples/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py
+++ b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/prepare_model.py
@@ -1,35 +1,35 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import argparse
-import tensorflow as tf
-def get_resnet101_model(saved_path):
-    model = tf.keras.applications.ResNet101(weights='imagenet')
-    model.save(saved_path)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export pretained keras model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help='path to exported model file')
-
-    args = parser.parse_args()
-    get_resnet101_model(args.output_model)
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+import tensorflow as tf
+def get_resnet101_model(saved_path):
+    model = tf.keras.applications.ResNet101(weights='imagenet')
+    model.save(saved_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export pretained keras model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help='path to exported model file')
+
+    args = parser.parse_args()
+    get_resnet101_model(args.output_model)
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/resnet101/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/resnet50/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/resnet50_fashion_mnist_train.py b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/resnet50_fashion_mnist_train.py
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/resnet50_fashion_mnist_train.py
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/resnet50_fashion_mnist_train.py
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/resnet50_fashion/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet50_fashion/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/resnet50_fashion/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/resnet50/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/resnet50/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/resnetv2_101/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/inception_resnet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/resnet101/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/resnet101/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/resnetv2_50/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/vgg16/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/vgg16/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/vgg16/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/vgg19/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/README.md b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/README.md
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/README.md
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/README.md
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/main.py b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/main.py
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/main.py
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/prepare_model.py b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/prepare_model.py
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/vgg19/quantization/ptq/requirements.txt b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/vgg19/quantization/ptq/requirements.txt
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_quant.sh b/examples/deprecated/keras/image_recognition/xception/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_quant.sh
rename to examples/deprecated/keras/image_recognition/xception/quantization/ptq/run_quant.sh
diff --git a/examples/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb b/examples/deprecated/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb
similarity index 100%
rename from examples/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb
rename to examples/deprecated/notebook/onnxruntime/Quick_Started_Notebook_of_INC_for_ONNXRuntime.ipynb
diff --git a/examples/notebook/onnxruntime/benchmark.py b/examples/deprecated/notebook/onnxruntime/benchmark.py
similarity index 100%
rename from examples/notebook/onnxruntime/benchmark.py
rename to examples/deprecated/notebook/onnxruntime/benchmark.py
diff --git a/examples/notebook/onnxruntime/requirements.txt b/examples/deprecated/notebook/onnxruntime/requirements.txt
similarity index 91%
rename from examples/notebook/onnxruntime/requirements.txt
rename to examples/deprecated/notebook/onnxruntime/requirements.txt
index 13f6ef9a8e1..c861daddc95 100644
--- a/examples/notebook/onnxruntime/requirements.txt
+++ b/examples/deprecated/notebook/onnxruntime/requirements.txt
@@ -1,12 +1,12 @@
-onnx
-onnxruntime
-onnxruntime-extensions
-torch
-transformers
-accelerate
-coloredlogs
-sympy
-numpy
-sentencepiece
-protobuf
-optimum
+onnx
+onnxruntime
+onnxruntime-extensions
+torch
+transformers
+accelerate
+coloredlogs
+sympy
+numpy
+sentencepiece
+protobuf
+optimum
diff --git a/examples/notebook/perf_fp32_int8_tf/README.md b/examples/deprecated/notebook/perf_fp32_int8_tf/README.md
similarity index 100%
rename from examples/notebook/perf_fp32_int8_tf/README.md
rename to examples/deprecated/notebook/perf_fp32_int8_tf/README.md
diff --git a/examples/notebook/perf_fp32_int8_tf/compare_result.py b/examples/deprecated/notebook/perf_fp32_int8_tf/compare_result.py
similarity index 100%
rename from examples/notebook/perf_fp32_int8_tf/compare_result.py
rename to examples/deprecated/notebook/perf_fp32_int8_tf/compare_result.py
diff --git a/examples/notebook/perf_fp32_int8_tf/run.sh b/examples/deprecated/notebook/perf_fp32_int8_tf/run.sh
similarity index 100%
rename from examples/notebook/perf_fp32_int8_tf/run.sh
rename to examples/deprecated/notebook/perf_fp32_int8_tf/run.sh
diff --git a/examples/notebook/perf_fp32_int8_tf/set_env.sh b/examples/deprecated/notebook/perf_fp32_int8_tf/set_env.sh
similarity index 100%
rename from examples/notebook/perf_fp32_int8_tf/set_env.sh
rename to examples/deprecated/notebook/perf_fp32_int8_tf/set_env.sh
diff --git a/examples/notebook/perf_fp32_int8_tf/test_performance.py b/examples/deprecated/notebook/perf_fp32_int8_tf/test_performance.py
similarity index 100%
rename from examples/notebook/perf_fp32_int8_tf/test_performance.py
rename to examples/deprecated/notebook/perf_fp32_int8_tf/test_performance.py
diff --git a/examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb b/examples/deprecated/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
similarity index 100%
rename from examples/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
rename to examples/deprecated/notebook/pytorch/Quick_Started_Notebook_of_INC_for_Pytorch.ipynb
diff --git a/examples/notebook/pytorch/requirements.txt b/examples/deprecated/notebook/pytorch/requirements.txt
similarity index 100%
rename from examples/notebook/pytorch/requirements.txt
rename to examples/deprecated/notebook/pytorch/requirements.txt
diff --git a/examples/notebook/tensorflow/alexnet_mnist/License.txt b/examples/deprecated/notebook/tensorflow/alexnet_mnist/License.txt
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/License.txt
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/License.txt
diff --git a/examples/notebook/tensorflow/alexnet_mnist/README.md b/examples/deprecated/notebook/tensorflow/alexnet_mnist/README.md
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/README.md
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/README.md
diff --git a/examples/notebook/tensorflow/alexnet_mnist/alexnet.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/alexnet.py
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/alexnet.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/alexnet.py
diff --git a/examples/notebook/tensorflow/alexnet_mnist/compare_perf.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/compare_perf.py
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/compare_perf.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/compare_perf.py
diff --git a/examples/notebook/tensorflow/alexnet_mnist/conda_set_env.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/conda_set_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/conda_set_env.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/conda_set_env.sh
diff --git a/examples/notebook/tensorflow/alexnet_mnist/devcloud_setup_env.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/devcloud_setup_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/devcloud_setup_env.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/devcloud_setup_env.sh
diff --git a/examples/notebook/tensorflow/alexnet_mnist/inc_quantize_model.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/inc_quantize_model.py
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/inc_quantize_model.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/inc_quantize_model.py
diff --git a/examples/notebook/tensorflow/alexnet_mnist/inc_sample_for_tensorflow.ipynb b/examples/deprecated/notebook/tensorflow/alexnet_mnist/inc_sample_for_tensorflow.ipynb
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/inc_sample_for_tensorflow.ipynb
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/inc_sample_for_tensorflow.ipynb
diff --git a/examples/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py
similarity index 97%
rename from examples/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py
index 1b0201b5eff..bbf5213ce48 100644
--- a/examples/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py
+++ b/examples/deprecated/notebook/tensorflow/alexnet_mnist/keras_tf_train_mnist.py
@@ -40,7 +40,7 @@
 width = 28
 channels = 1
 
-model = alexnet.create_model(width ,channels ,classes)
+model = alexnet.create_model(width, channels, classes)
 
 model.summary()
 
diff --git a/examples/notebook/tensorflow/alexnet_mnist/mnist_dataset.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/mnist_dataset.py
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/mnist_dataset.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/mnist_dataset.py
diff --git a/examples/notebook/tensorflow/alexnet_mnist/pip_set_env.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/pip_set_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/pip_set_env.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/pip_set_env.sh
diff --git a/examples/notebook/tensorflow/alexnet_mnist/profiling_inc.py b/examples/deprecated/notebook/tensorflow/alexnet_mnist/profiling_inc.py
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/profiling_inc.py
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/profiling_inc.py
diff --git a/examples/notebook/tensorflow/alexnet_mnist/requirements.txt b/examples/deprecated/notebook/tensorflow/alexnet_mnist/requirements.txt
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/requirements.txt
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/requirements.txt
diff --git a/examples/notebook/tensorflow/alexnet_mnist/run_in_intel_devcloud.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/run_in_intel_devcloud.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/run_in_intel_devcloud.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/run_in_intel_devcloud.sh
diff --git a/examples/notebook/tensorflow/alexnet_mnist/run_jupyter.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/run_jupyter.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/run_jupyter.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/run_jupyter.sh
diff --git a/examples/notebook/tensorflow/alexnet_mnist/run_sample.sh b/examples/deprecated/notebook/tensorflow/alexnet_mnist/run_sample.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/alexnet_mnist/run_sample.sh
rename to examples/deprecated/notebook/tensorflow/alexnet_mnist/run_sample.sh
diff --git a/examples/notebook/tensorflow/resnet/README.md b/examples/deprecated/notebook/tensorflow/resnet/README.md
similarity index 100%
rename from examples/notebook/tensorflow/resnet/README.md
rename to examples/deprecated/notebook/tensorflow/resnet/README.md
diff --git a/examples/notebook/tensorflow/resnet/requirements.txt b/examples/deprecated/notebook/tensorflow/resnet/requirements.txt
similarity index 93%
rename from examples/notebook/tensorflow/resnet/requirements.txt
rename to examples/deprecated/notebook/tensorflow/resnet/requirements.txt
index a2d431e0ace..9b7783d9c34 100644
--- a/examples/notebook/tensorflow/resnet/requirements.txt
+++ b/examples/deprecated/notebook/tensorflow/resnet/requirements.txt
@@ -1,8 +1,8 @@
-numpy
-neural-compressor
-tensorflow
-datasets
-requests
-urllib3
-pyOpenSSL
-git+https://github.com/huggingface/huggingface_hub
+numpy
+neural-compressor
+tensorflow
+datasets
+requests
+urllib3
+pyOpenSSL
+git+https://github.com/huggingface/huggingface_hub
diff --git a/examples/notebook/tensorflow/resnet/resnet_benchmark.py b/examples/deprecated/notebook/tensorflow/resnet/resnet_benchmark.py
similarity index 100%
rename from examples/notebook/tensorflow/resnet/resnet_benchmark.py
rename to examples/deprecated/notebook/tensorflow/resnet/resnet_benchmark.py
diff --git a/examples/notebook/tensorflow/resnet/resnet_quantization.ipynb b/examples/deprecated/notebook/tensorflow/resnet/resnet_quantization.ipynb
similarity index 100%
rename from examples/notebook/tensorflow/resnet/resnet_quantization.ipynb
rename to examples/deprecated/notebook/tensorflow/resnet/resnet_quantization.ipynb
diff --git a/examples/notebook/tensorflow/vgg19_ibean/README.md b/examples/deprecated/notebook/tensorflow/vgg19_ibean/README.md
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/README.md
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/README.md
diff --git a/examples/notebook/tensorflow/vgg19_ibean/compare_perf.py b/examples/deprecated/notebook/tensorflow/vgg19_ibean/compare_perf.py
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/compare_perf.py
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/compare_perf.py
diff --git a/examples/notebook/tensorflow/vgg19_ibean/conda_set_env.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/conda_set_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/conda_set_env.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/conda_set_env.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/devcloud_setup_env.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/devcloud_setup_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/devcloud_setup_env.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/devcloud_setup_env.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/inc_quantize_model.py b/examples/deprecated/notebook/tensorflow/vgg19_ibean/inc_quantize_model.py
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/inc_quantize_model.py
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/inc_quantize_model.py
diff --git a/examples/notebook/tensorflow/vgg19_ibean/inc_quantize_vgg19.ipynb b/examples/deprecated/notebook/tensorflow/vgg19_ibean/inc_quantize_vgg19.ipynb
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/inc_quantize_vgg19.ipynb
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/inc_quantize_vgg19.ipynb
diff --git a/examples/notebook/tensorflow/vgg19_ibean/pip_set_env.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/pip_set_env.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/pip_set_env.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/pip_set_env.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/profiling_inc.py b/examples/deprecated/notebook/tensorflow/vgg19_ibean/profiling_inc.py
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/profiling_inc.py
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/profiling_inc.py
diff --git a/examples/notebook/tensorflow/vgg19_ibean/requirements.txt b/examples/deprecated/notebook/tensorflow/vgg19_ibean/requirements.txt
similarity index 92%
rename from examples/notebook/tensorflow/vgg19_ibean/requirements.txt
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/requirements.txt
index e866fcd37f6..eedb174ddfa 100644
--- a/examples/notebook/tensorflow/vgg19_ibean/requirements.txt
+++ b/examples/deprecated/notebook/tensorflow/vgg19_ibean/requirements.txt
@@ -1,5 +1,5 @@
-numpy
-matplotlib
-tensorflow
-tensorflow-hub
-tensorflow-datasets
+numpy
+matplotlib
+tensorflow
+tensorflow-hub
+tensorflow-datasets
diff --git a/examples/notebook/tensorflow/vgg19_ibean/run_in_intel_devcloud.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/run_in_intel_devcloud.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/run_in_intel_devcloud.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/run_in_intel_devcloud.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/run_jupyter.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/run_jupyter.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/run_jupyter.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/run_jupyter.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/run_sample.sh b/examples/deprecated/notebook/tensorflow/vgg19_ibean/run_sample.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/run_sample.sh
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/run_sample.sh
diff --git a/examples/notebook/tensorflow/vgg19_ibean/train_model.py b/examples/deprecated/notebook/tensorflow/vgg19_ibean/train_model.py
similarity index 100%
rename from examples/notebook/tensorflow/vgg19_ibean/train_model.py
rename to examples/deprecated/notebook/tensorflow/vgg19_ibean/train_model.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py
index 365280b6471..df5e735fdda 100644
--- a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/main.py
@@ -1,214 +1,214 @@
-import cv2
-import onnx
-import os
-import numpy as np
-from sklearn.model_selection import KFold
-import sklearn
-import pickle
-import logging
-import argparse
-import onnxruntime as ort
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-def load_bin(path, image_size):
-    bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
-    data_list = []
-    for flip in [0,1]:
-        data = np.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
-        data_list.append(data)
-    for i in range(len(issame_list)*2):
-        _bin = bins[i]
-        img = cv2.imdecode(np.frombuffer(_bin, np.uint8), -1)
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        img = np.transpose(img, axes=(2, 0, 1))
-        for flip in [0,1]:
-            if flip==1:
-                img = np.flip(img, axis=2)
-            data_list[flip][i][:] = img
-    data = np.concatenate(data_list)
-    return (data.astype('float32'), issame_list)
-
-def load_property(data_dir):
-    for line in open(os.path.join(os.path.split(data_dir)[0], 'property')):
-        vec = line.strip().split(',')
-        assert len(vec)==3
-        image_size = [int(vec[1]), int(vec[2])]
-    return image_size
-    
-class LFold:
-    def __init__(self, n_splits = 2, shuffle = False):
-        self.n_splits = n_splits
-        if self.n_splits>1:
-            self.k_fold = KFold(n_splits = n_splits, shuffle = shuffle)
-
-    def split(self, indices):
-        if self.n_splits>1:
-            return self.k_fold.split(indices)
-        else:
-            return [(indices, indices)]
-
-def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds):
-    assert(embeddings1.shape[0] == embeddings2.shape[0])
-    assert(embeddings1.shape[1] == embeddings2.shape[1])
-    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
-    nrof_thresholds = len(thresholds)
-    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
-    
-    accuracy = np.zeros((nrof_folds))
-    indices = np.arange(nrof_pairs)
-    
-    diff = np.subtract(embeddings1, embeddings2)
-    dist = np.sum(np.square(diff),1)
-    
-    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):        
-        # Find the best threshold for the fold
-        acc_train = np.zeros((nrof_thresholds))
-        for threshold_idx, threshold in enumerate(thresholds):
-            acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
-        best_threshold_index = np.argmax(acc_train)
-        accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
-          
-    return accuracy
-
-def calculate_accuracy(threshold, dist, actual_issame):
-    predict_issame = np.less(dist, threshold)
-    tp = np.sum(np.logical_and(predict_issame, actual_issame))
-    tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
-  
-    acc = float(tp+tn)/dist.size
-    return acc
-
-class Metric:
-    def __init__(self, nfolds):
-        self.embeddings = []
-        self.actual_issame = None
-        self.nfolds = nfolds
-
-    def update(self, preds, labels):
-        if self.actual_issame is None:
-            self.actual_issame = np.asarray(labels)
-            self.actual_issame = self.actual_issame.squeeze()
-        self.embeddings.append(preds[0])
-
-    def reset(self):
-        self.embeddings = []
-        self.actual_issame = None
-
-    def result(self):
-        embeddings_list = np.array(self.embeddings)
-        num = embeddings_list.shape[0]
-        embeddings = embeddings_list[:num//2] + embeddings_list[num//2:]
-        embeddings = embeddings.squeeze()
-        embeddings = sklearn.preprocessing.normalize(embeddings)
-        thresholds = np.arange(0, 4, 0.02)
-        embeddings1 = embeddings[0::2]
-        embeddings2 = embeddings[1::2]
-        accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
-            np.asarray(self.actual_issame), nrof_folds=self.nfolds)
-        return np.mean(accuracy)
-
-class Dataloader:
-    def __init__(self, data_dir, batch_size):
-        self.batch_size = batch_size
-        path = os.path.join(data_dir)
-        # Load data
-        if os.path.exists(path):
-            data_set = load_bin(path, image_size)
-            self.data_list = data_set[0]
-            self.issame_list = data_set[1]
- 
-    def __iter__(self):
-        for data in self.data_list:
-            yield np.expand_dims(data, axis=0), self.issame_list
-
-def eval_func(model, dataloader, metric):
-    metric.reset()
-    sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
-    ort_inputs = {}
-    input_names = [i.name for i in sess.get_inputs()]
-    for input_data, label in dataloader:
-        output = sess.run(None, dict(zip(input_names, [input_data])))
-        metric.update(output, label)
-    return metric.result()
-
-if __name__ == '__main__':
-    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
-    parser = argparse.ArgumentParser(
-        description="ArcFace fine-tune examples for face recognition/verification.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained model on onnx file"
-    )
-    parser.add_argument(
-        '--dataset_location',
-        type=str,
-        help="Imagenet data path"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--nfolds',
-        type=int,
-        default=1,
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        "--batch_size",
-        default=1,
-        type=int,
-    )
-    args = parser.parse_args()
-    # Load image size
-    image_size = load_property(args.dataset_location)
-    print('image_size', image_size)
-    
-    dataloader = Dataloader(args.dataset_location, args.batch_size)
-    model = onnx.load(args.model_path)
-    metric = Metric(args.nfolds)
-    def eval(onnx_model):
-        return eval_func(onnx_model, dataloader, metric)
-    
-    if args.benchmark:
-        if args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
-            fit(model, conf, b_dataloader=dataloader)
-        elif args.mode == 'accuracy':
-            acc_result = eval(model)
-            print("Batch size = %d" % dataloader.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-
-    if args.tune:
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        config = PostTrainingQuantConfig(approach='static')
- 
-        q_model = quantization.fit(model, config, calib_dataloader=dataloader,
-			     eval_func=eval)
-
-        q_model.save(args.output_model)
+import cv2
+import onnx
+import os
+import numpy as np
+from sklearn.model_selection import KFold
+import sklearn
+import pickle
+import logging
+import argparse
+import onnxruntime as ort
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+                    datefmt = '%m/%d/%Y %H:%M:%S',
+                    level = logging.WARN)
+
+def load_bin(path, image_size):
+    bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
+    data_list = []
+    for flip in [0,1]:
+        data = np.empty((len(issame_list)*2, 3, image_size[0], image_size[1]))
+        data_list.append(data)
+    for i in range(len(issame_list)*2):
+        _bin = bins[i]
+        img = cv2.imdecode(np.frombuffer(_bin, np.uint8), -1)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        img = np.transpose(img, axes=(2, 0, 1))
+        for flip in [0,1]:
+            if flip==1:
+                img = np.flip(img, axis=2)
+            data_list[flip][i][:] = img
+    data = np.concatenate(data_list)
+    return (data.astype('float32'), issame_list)
+
+def load_property(data_dir):
+    for line in open(os.path.join(os.path.split(data_dir)[0], 'property')):
+        vec = line.strip().split(',')
+        assert len(vec)==3
+        image_size = [int(vec[1]), int(vec[2])]
+    return image_size
+    
+class LFold:
+    def __init__(self, n_splits = 2, shuffle = False):
+        self.n_splits = n_splits
+        if self.n_splits>1:
+            self.k_fold = KFold(n_splits = n_splits, shuffle = shuffle)
+
+    def split(self, indices):
+        if self.n_splits>1:
+            return self.k_fold.split(indices)
+        else:
+            return [(indices, indices)]
+
+def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds):
+    assert(embeddings1.shape[0] == embeddings2.shape[0])
+    assert(embeddings1.shape[1] == embeddings2.shape[1])
+    nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
+    nrof_thresholds = len(thresholds)
+    k_fold = LFold(n_splits=nrof_folds, shuffle=False)
+    
+    accuracy = np.zeros((nrof_folds))
+    indices = np.arange(nrof_pairs)
+    
+    diff = np.subtract(embeddings1, embeddings2)
+    dist = np.sum(np.square(diff),1)
+    
+    for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):        
+        # Find the best threshold for the fold
+        acc_train = np.zeros((nrof_thresholds))
+        for threshold_idx, threshold in enumerate(thresholds):
+            acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
+        best_threshold_index = np.argmax(acc_train)
+        accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
+          
+    return accuracy
+
+def calculate_accuracy(threshold, dist, actual_issame):
+    predict_issame = np.less(dist, threshold)
+    tp = np.sum(np.logical_and(predict_issame, actual_issame))
+    tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
+  
+    acc = float(tp+tn)/dist.size
+    return acc
+
+class Metric:
+    def __init__(self, nfolds):
+        self.embeddings = []
+        self.actual_issame = None
+        self.nfolds = nfolds
+
+    def update(self, preds, labels):
+        if self.actual_issame is None:
+            self.actual_issame = np.asarray(labels)
+            self.actual_issame = self.actual_issame.squeeze()
+        self.embeddings.append(preds[0])
+
+    def reset(self):
+        self.embeddings = []
+        self.actual_issame = None
+
+    def result(self):
+        embeddings_list = np.array(self.embeddings)
+        num = embeddings_list.shape[0]
+        embeddings = embeddings_list[:num//2] + embeddings_list[num//2:]
+        embeddings = embeddings.squeeze()
+        embeddings = sklearn.preprocessing.normalize(embeddings)
+        thresholds = np.arange(0, 4, 0.02)
+        embeddings1 = embeddings[0::2]
+        embeddings2 = embeddings[1::2]
+        accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
+            np.asarray(self.actual_issame), nrof_folds=self.nfolds)
+        return np.mean(accuracy)
+
+class Dataloader:
+    def __init__(self, data_dir, batch_size):
+        self.batch_size = batch_size
+        path = os.path.join(data_dir)
+        # Load data
+        if os.path.exists(path):
+            data_set = load_bin(path, image_size)
+            self.data_list = data_set[0]
+            self.issame_list = data_set[1]
+ 
+    def __iter__(self):
+        for data in self.data_list:
+            yield np.expand_dims(data, axis=0), self.issame_list
+
+def eval_func(model, dataloader, metric):
+    metric.reset()
+    sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
+    ort_inputs = {}
+    input_names = [i.name for i in sess.get_inputs()]
+    for input_data, label in dataloader:
+        output = sess.run(None, dict(zip(input_names, [input_data])))
+        metric.update(output, label)
+    return metric.result()
+
+if __name__ == '__main__':
+    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
+    parser = argparse.ArgumentParser(
+        description="ArcFace fine-tune examples for face recognition/verification.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        '--model_path',
+        type=str,
+        help="Pre-trained model on onnx file"
+    )
+    parser.add_argument(
+        '--dataset_location',
+        type=str,
+        help="Imagenet data path"
+    )
+    parser.add_argument(
+        '--benchmark',
+        action='store_true', \
+        default=False
+    )
+    parser.add_argument(
+        '--tune',
+        action='store_true', \
+        default=False,
+        help="whether quantize the model"
+    )
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help="output model path"
+    )
+    parser.add_argument(
+        '--nfolds',
+        type=int,
+        default=1,
+    )
+    parser.add_argument(
+        '--mode',
+        type=str,
+        help="benchmark mode of performance or accuracy"
+    )
+    parser.add_argument(
+        "--batch_size",
+        default=1,
+        type=int,
+    )
+    args = parser.parse_args()
+    # Load image size
+    image_size = load_property(args.dataset_location)
+    print('image_size', image_size)
+    
+    dataloader = Dataloader(args.dataset_location, args.batch_size)
+    model = onnx.load(args.model_path)
+    metric = Metric(args.nfolds)
+    def eval(onnx_model):
+        return eval_func(onnx_model, dataloader, metric)
+    
+    if args.benchmark:
+        if args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
+            fit(model, conf, b_dataloader=dataloader)
+        elif args.mode == 'accuracy':
+            acc_result = eval(model)
+            print("Batch size = %d" % dataloader.batch_size)
+            print("Accuracy: %.5f" % acc_result)
+
+    if args.tune:
+        from neural_compressor import quantization, PostTrainingQuantConfig
+        config = PostTrainingQuantConfig(approach='static')
+ 
+        q_model = quantization.fit(model, config, calib_dataloader=dataloader,
+			     eval_func=eval)
+
+        q_model.save(args.output_model)
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/arcface/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/emotion_ferplus/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_easy_val.mat b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_easy_val.mat
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_easy_val.mat
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_easy_val.mat
diff --git a/examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_face_val.mat b/examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_face_val.mat
similarity index 100%
rename from examples/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_face_val.mat
rename to examples/deprecated/onnxrt/body_analysis/onnx_model_zoo/ultraface/quantization/ptq_static/wider_face_val.mat
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py
similarity index 97%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py
index 7e9cd599893..6bac842d81b 100644
--- a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py
+++ b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/beit_modeling_finetune.py
@@ -1,420 +1,420 @@
-# --------------------------------------------------------
-# BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
-# Github source: https://github.com/microsoft/unilm/tree/master/beit
-# Copyright (c) 2021 Microsoft
-# Licensed under The MIT License [see LICENSE for details]
-# By Hangbo Bao
-# Based on timm and DeiT code bases
-# https://github.com/rwightman/pytorch-image-models/tree/master/timm
-# https://github.com/facebookresearch/deit/
-# https://github.com/facebookresearch/dino
-# --------------------------------------------------------'
-import math
-from functools import partial
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from timm.models.layers import drop_path, to_2tuple, trunc_normal_
-from timm.models.registry import register_model
-
-
-def _cfg(url='', **kwargs):
-    return {
-        'url': url,
-        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
-        'crop_pct': .9, 'interpolation': 'bicubic',
-        'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5),
-        **kwargs
-    }
-
-
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-    
-    def extra_repr(self) -> str:
-        return 'p={}'.format(self.drop_prob)
-
-
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        # x = self.drop(x)
-        # commit this for the original BERT implement 
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class Attention(nn.Module):
-    def __init__(
-            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
-            proj_drop=0., window_size=None, attn_head_dim=None):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        if attn_head_dim is not None:
-            head_dim = attn_head_dim
-        all_head_dim = head_dim * self.num_heads
-        self.scale = qk_scale or head_dim ** -0.5
-
-        self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
-        if qkv_bias:
-            self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
-            self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
-        else:
-            self.q_bias = None
-            self.v_bias = None
-
-        if window_size:
-            self.window_size = window_size
-            self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
-            self.relative_position_bias_table = nn.Parameter(
-                torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
-            # cls to token & token 2 cls & cls to cls
-
-            # get pair-wise relative position index for each token inside the window
-            coords_h = torch.arange(window_size[0])
-            coords_w = torch.arange(window_size[1])
-            coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-            coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-            relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-            relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-            relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
-            relative_coords[:, :, 1] += window_size[1] - 1
-            relative_coords[:, :, 0] *= 2 * window_size[1] - 1
-            relative_position_index = \
-                torch.zeros(size=(window_size[0] * window_size[1] + 1, ) * 2, dtype=relative_coords.dtype)
-            relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-            relative_position_index[0, 0:] = self.num_relative_distance - 3
-            relative_position_index[0:, 0] = self.num_relative_distance - 2
-            relative_position_index[0, 0] = self.num_relative_distance - 1
-
-            self.register_buffer("relative_position_index", relative_position_index)
-        else:
-            self.window_size = None
-            self.relative_position_bias_table = None
-            self.relative_position_index = None
-
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(all_head_dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-
-    def forward(self, x, rel_pos_bias=None):
-        B, N, C = x.shape
-        qkv_bias = None
-        if self.q_bias is not None:
-            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
-        # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
-        qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0], qkv[1], qkv[2]   # make torchscript happy (cannot use tensor as tuple)
-
-        q = q * self.scale
-        attn = (q @ k.transpose(-2, -1))
-
-        if self.relative_position_bias_table is not None:
-            relative_position_bias = \
-                self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
-                    self.window_size[0] * self.window_size[1] + 1,
-                    self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-            relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-            attn = attn + relative_position_bias.unsqueeze(0)
-
-        if rel_pos_bias is not None:
-            attn = attn + rel_pos_bias
-        
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Module):
-
-    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., init_values=None, act_layer=nn.GELU, norm_layer=nn.LayerNorm,
-                 window_size=None, attn_head_dim=None):
-        super().__init__()
-        self.norm1 = norm_layer(dim)
-        self.attn = Attention(
-            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
-            attn_drop=attn_drop, proj_drop=drop, window_size=window_size, attn_head_dim=attn_head_dim)
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
-        if init_values is not None and init_values > 0:
-            self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
-            self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
-        else:
-            self.gamma_1, self.gamma_2 = None, None
-
-    def forward(self, x, rel_pos_bias=None):
-        if self.gamma_1 is None:
-            x = x + self.drop_path(self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias))
-            x = x + self.drop_path(self.mlp(self.norm2(x)))
-        else:
-            x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias))
-            x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Module):
-    """ Image to Patch Embedding
-    """
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
-        self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.num_patches = num_patches
-
-        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-
-    def forward(self, x, **kwargs):
-        B, C, H, W = x.shape
-        # FIXME look at relaxing size constraints
-        assert H == self.img_size[0] and W == self.img_size[1], \
-            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
-        x = self.proj(x).flatten(2).transpose(1, 2)
-        return x
-
-
-class RelativePositionBias(nn.Module):
-
-    def __init__(self, window_size, num_heads):
-        super().__init__()
-        self.window_size = window_size
-        self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
-        self.relative_position_bias_table = nn.Parameter(
-            torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
-        # cls to token & token 2 cls & cls to cls
-
-        # get pair-wise relative position index for each token inside the window
-        coords_h = torch.arange(window_size[0])
-        coords_w = torch.arange(window_size[1])
-        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-        relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
-        relative_coords[:, :, 1] += window_size[1] - 1
-        relative_coords[:, :, 0] *= 2 * window_size[1] - 1
-        relative_position_index = \
-            torch.zeros(size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype)
-        relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-        relative_position_index[0, 0:] = self.num_relative_distance - 3
-        relative_position_index[0:, 0] = self.num_relative_distance - 2
-        relative_position_index[0, 0] = self.num_relative_distance - 1
-
-        self.register_buffer("relative_position_index", relative_position_index)
-
-        # trunc_normal_(self.relative_position_bias_table, std=.02)
-
-    def forward(self):
-        relative_position_bias = \
-            self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
-                self.window_size[0] * self.window_size[1] + 1,
-                self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-        return relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-
-
-class VisionTransformer(nn.Module):
-    """ Vision Transformer with support for patch or hybrid CNN input stage
-    """
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
-                 num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
-                 drop_path_rate=0., norm_layer=nn.LayerNorm, init_values=None,
-                 use_abs_pos_emb=True, use_rel_pos_bias=False, use_shared_rel_pos_bias=False,
-                 use_mean_pooling=True, init_scale=0.001, pretrained_cfg=None, pretrained_cfg_overlay=None):
-        super().__init__()
-        self.num_classes = num_classes
-        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
-
-        self.patch_embed = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
-        num_patches = self.patch_embed.num_patches
-
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-        # self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-        if use_abs_pos_emb:
-            self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
-        else:
-            self.pos_embed = None
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        if use_shared_rel_pos_bias:
-            self.rel_pos_bias = RelativePositionBias(window_size=self.patch_embed.patch_shape, num_heads=num_heads)
-        else:
-            self.rel_pos_bias = None
-
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
-        self.use_rel_pos_bias = use_rel_pos_bias
-        self.blocks = nn.ModuleList([
-            Block(
-                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
-                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
-                init_values=init_values, window_size=self.patch_embed.patch_shape if use_rel_pos_bias else None)
-            for i in range(depth)])
-        self.norm = nn.Identity() if use_mean_pooling else norm_layer(embed_dim)
-        self.fc_norm = norm_layer(embed_dim) if use_mean_pooling else None
-        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
-
-        if self.pos_embed is not None:
-            trunc_normal_(self.pos_embed, std=.02)
-        trunc_normal_(self.cls_token, std=.02)
-        # trunc_normal_(self.mask_token, std=.02)
-        if isinstance(self.head, nn.Linear):
-            trunc_normal_(self.head.weight, std=.02)
-        self.apply(self._init_weights)
-        self.fix_init_weight()
-
-        if isinstance(self.head, nn.Linear):
-            self.head.weight.data.mul_(init_scale)
-            self.head.bias.data.mul_(init_scale)
-
-    def fix_init_weight(self):
-        def rescale(param, layer_id):
-            param.div_(math.sqrt(2.0 * layer_id))
-
-        for layer_id, layer in enumerate(self.blocks):
-            rescale(layer.attn.proj.weight.data, layer_id + 1)
-            rescale(layer.mlp.fc2.weight.data, layer_id + 1)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-
-    def get_num_layers(self):
-        return len(self.blocks)
-
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'pos_embed', 'cls_token'}
-
-    def get_classifier(self):
-        return self.head
-
-    def reset_classifier(self, num_classes, global_pool=''):
-        self.num_classes = num_classes
-        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
-
-    def forward_features(self, x):
-        x = self.patch_embed(x)
-        batch_size, seq_len, _ = x.size()
-
-        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
-        x = torch.cat((cls_tokens, x), dim=1)
-        if self.pos_embed is not None:
-            x = x + self.pos_embed
-        x = self.pos_drop(x)
-
-        rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None
-        for blk in self.blocks:
-            x = blk(x, rel_pos_bias=rel_pos_bias)
-
-        x = self.norm(x)
-        if self.fc_norm is not None:
-            t = x[:, 1:, :]
-            return self.fc_norm(t.mean(1))
-        else:
-            return x[:, 0]
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        x = self.head(x)
-        return x
-
-    def get_intermediate_layers(self, x):
-        x = self.patch_embed(x)
-        batch_size, seq_len, _ = x.size()
-
-        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
-        x = torch.cat((cls_tokens, x), dim=1)
-        if self.pos_embed is not None:
-            x = x + self.pos_embed
-        x = self.pos_drop(x)
-
-        features = []
-        rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None
-        for blk in self.blocks:
-            x = blk(x, rel_pos_bias)
-            features.append(x)
-
-        return features
-
-
-@register_model
-def beit_base_patch16_224(pretrained=False, **kwargs):
-    model = VisionTransformer(
-        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-
-@register_model
-def beit_base_patch16_384(pretrained=False, **kwargs):
-    model = VisionTransformer(
-        img_size=384, patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-
-@register_model
-def beit_large_patch16_224(pretrained=False, **kwargs):
-    model = VisionTransformer(
-        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-
-@register_model
-def beit_large_patch16_384(pretrained=False, **kwargs):
-    model = VisionTransformer(
-        img_size=384, patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-
-@register_model
-def beit_large_patch16_512(pretrained=False, **kwargs):
-    model = VisionTransformer(
-        img_size=512, patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
-    model.default_cfg = _cfg()
-    return model
+# --------------------------------------------------------
+# BEIT: BERT Pre-Training of Image Transformers (https://arxiv.org/abs/2106.08254)
+# Github source: https://github.com/microsoft/unilm/tree/master/beit
+# Copyright (c) 2021 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# By Hangbo Bao
+# Based on timm and DeiT code bases
+# https://github.com/rwightman/pytorch-image-models/tree/master/timm
+# https://github.com/facebookresearch/deit/
+# https://github.com/facebookresearch/dino
+# --------------------------------------------------------'
+import math
+from functools import partial
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from timm.models.layers import drop_path, to_2tuple, trunc_normal_
+from timm.models.registry import register_model
+
+
+def _cfg(url='', **kwargs):
+    return {
+        'url': url,
+        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
+        'crop_pct': .9, 'interpolation': 'bicubic',
+        'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5),
+        **kwargs
+    }
+
+
+class DropPath(nn.Module):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+    
+    def extra_repr(self) -> str:
+        return 'p={}'.format(self.drop_prob)
+
+
+class Mlp(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        # x = self.drop(x)
+        # commit this for the original BERT implement 
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class Attention(nn.Module):
+    def __init__(
+            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
+            proj_drop=0., window_size=None, attn_head_dim=None):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        if attn_head_dim is not None:
+            head_dim = attn_head_dim
+        all_head_dim = head_dim * self.num_heads
+        self.scale = qk_scale or head_dim ** -0.5
+
+        self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
+        if qkv_bias:
+            self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
+            self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
+        else:
+            self.q_bias = None
+            self.v_bias = None
+
+        if window_size:
+            self.window_size = window_size
+            self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
+            self.relative_position_bias_table = nn.Parameter(
+                torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+            # cls to token & token 2 cls & cls to cls
+
+            # get pair-wise relative position index for each token inside the window
+            coords_h = torch.arange(window_size[0])
+            coords_w = torch.arange(window_size[1])
+            coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+            coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+            relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+            relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+            relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
+            relative_coords[:, :, 1] += window_size[1] - 1
+            relative_coords[:, :, 0] *= 2 * window_size[1] - 1
+            relative_position_index = \
+                torch.zeros(size=(window_size[0] * window_size[1] + 1, ) * 2, dtype=relative_coords.dtype)
+            relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+            relative_position_index[0, 0:] = self.num_relative_distance - 3
+            relative_position_index[0:, 0] = self.num_relative_distance - 2
+            relative_position_index[0, 0] = self.num_relative_distance - 1
+
+            self.register_buffer("relative_position_index", relative_position_index)
+        else:
+            self.window_size = None
+            self.relative_position_bias_table = None
+            self.relative_position_index = None
+
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(all_head_dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+
+    def forward(self, x, rel_pos_bias=None):
+        B, N, C = x.shape
+        qkv_bias = None
+        if self.q_bias is not None:
+            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
+        # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
+        qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
+        qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
+        q, k, v = qkv[0], qkv[1], qkv[2]   # make torchscript happy (cannot use tensor as tuple)
+
+        q = q * self.scale
+        attn = (q @ k.transpose(-2, -1))
+
+        if self.relative_position_bias_table is not None:
+            relative_position_bias = \
+                self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+                    self.window_size[0] * self.window_size[1] + 1,
+                    self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
+            relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+            attn = attn + relative_position_bias.unsqueeze(0)
+
+        if rel_pos_bias is not None:
+            attn = attn + rel_pos_bias
+        
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Block(nn.Module):
+
+    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
+                 drop_path=0., init_values=None, act_layer=nn.GELU, norm_layer=nn.LayerNorm,
+                 window_size=None, attn_head_dim=None):
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
+            attn_drop=attn_drop, proj_drop=drop, window_size=window_size, attn_head_dim=attn_head_dim)
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+
+        if init_values is not None and init_values > 0:
+            self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
+            self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)),requires_grad=True)
+        else:
+            self.gamma_1, self.gamma_2 = None, None
+
+    def forward(self, x, rel_pos_bias=None):
+        if self.gamma_1 is None:
+            x = x + self.drop_path(self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias))
+            x = x + self.drop_path(self.mlp(self.norm2(x)))
+        else:
+            x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias))
+            x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
+        return x
+
+
+class PatchEmbed(nn.Module):
+    """ Image to Patch Embedding
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        img_size = to_2tuple(img_size)
+        patch_size = to_2tuple(patch_size)
+        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
+        self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.num_patches = num_patches
+
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+
+    def forward(self, x, **kwargs):
+        B, C, H, W = x.shape
+        # FIXME look at relaxing size constraints
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose(1, 2)
+        return x
+
+
+class RelativePositionBias(nn.Module):
+
+    def __init__(self, window_size, num_heads):
+        super().__init__()
+        self.window_size = window_size
+        self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
+        self.relative_position_bias_table = nn.Parameter(
+            torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
+        # cls to token & token 2 cls & cls to cls
+
+        # get pair-wise relative position index for each token inside the window
+        coords_h = torch.arange(window_size[0])
+        coords_w = torch.arange(window_size[1])
+        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
+        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
+        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
+        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
+        relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
+        relative_coords[:, :, 1] += window_size[1] - 1
+        relative_coords[:, :, 0] *= 2 * window_size[1] - 1
+        relative_position_index = \
+            torch.zeros(size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype)
+        relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
+        relative_position_index[0, 0:] = self.num_relative_distance - 3
+        relative_position_index[0:, 0] = self.num_relative_distance - 2
+        relative_position_index[0, 0] = self.num_relative_distance - 1
+
+        self.register_buffer("relative_position_index", relative_position_index)
+
+        # trunc_normal_(self.relative_position_bias_table, std=.02)
+
+    def forward(self):
+        relative_position_bias = \
+            self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
+                self.window_size[0] * self.window_size[1] + 1,
+                self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
+        return relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
+
+
+class VisionTransformer(nn.Module):
+    """ Vision Transformer with support for patch or hybrid CNN input stage
+    """
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, num_classes=1000, embed_dim=768, depth=12,
+                 num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
+                 drop_path_rate=0., norm_layer=nn.LayerNorm, init_values=None,
+                 use_abs_pos_emb=True, use_rel_pos_bias=False, use_shared_rel_pos_bias=False,
+                 use_mean_pooling=True, init_scale=0.001, pretrained_cfg=None, pretrained_cfg_overlay=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
+
+        self.patch_embed = PatchEmbed(
+            img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
+        num_patches = self.patch_embed.num_patches
+
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        # self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        if use_abs_pos_emb:
+            self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
+        else:
+            self.pos_embed = None
+        self.pos_drop = nn.Dropout(p=drop_rate)
+
+        if use_shared_rel_pos_bias:
+            self.rel_pos_bias = RelativePositionBias(window_size=self.patch_embed.patch_shape, num_heads=num_heads)
+        else:
+            self.rel_pos_bias = None
+
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
+        self.use_rel_pos_bias = use_rel_pos_bias
+        self.blocks = nn.ModuleList([
+            Block(
+                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
+                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
+                init_values=init_values, window_size=self.patch_embed.patch_shape if use_rel_pos_bias else None)
+            for i in range(depth)])
+        self.norm = nn.Identity() if use_mean_pooling else norm_layer(embed_dim)
+        self.fc_norm = norm_layer(embed_dim) if use_mean_pooling else None
+        self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+        if self.pos_embed is not None:
+            trunc_normal_(self.pos_embed, std=.02)
+        trunc_normal_(self.cls_token, std=.02)
+        # trunc_normal_(self.mask_token, std=.02)
+        if isinstance(self.head, nn.Linear):
+            trunc_normal_(self.head.weight, std=.02)
+        self.apply(self._init_weights)
+        self.fix_init_weight()
+
+        if isinstance(self.head, nn.Linear):
+            self.head.weight.data.mul_(init_scale)
+            self.head.bias.data.mul_(init_scale)
+
+    def fix_init_weight(self):
+        def rescale(param, layer_id):
+            param.div_(math.sqrt(2.0 * layer_id))
+
+        for layer_id, layer in enumerate(self.blocks):
+            rescale(layer.attn.proj.weight.data, layer_id + 1)
+            rescale(layer.mlp.fc2.weight.data, layer_id + 1)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    def get_num_layers(self):
+        return len(self.blocks)
+
+    @torch.jit.ignore
+    def no_weight_decay(self):
+        return {'pos_embed', 'cls_token'}
+
+    def get_classifier(self):
+        return self.head
+
+    def reset_classifier(self, num_classes, global_pool=''):
+        self.num_classes = num_classes
+        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
+
+    def forward_features(self, x):
+        x = self.patch_embed(x)
+        batch_size, seq_len, _ = x.size()
+
+        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        if self.pos_embed is not None:
+            x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None
+        for blk in self.blocks:
+            x = blk(x, rel_pos_bias=rel_pos_bias)
+
+        x = self.norm(x)
+        if self.fc_norm is not None:
+            t = x[:, 1:, :]
+            return self.fc_norm(t.mean(1))
+        else:
+            return x[:, 0]
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        x = self.head(x)
+        return x
+
+    def get_intermediate_layers(self, x):
+        x = self.patch_embed(x)
+        batch_size, seq_len, _ = x.size()
+
+        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
+        x = torch.cat((cls_tokens, x), dim=1)
+        if self.pos_embed is not None:
+            x = x + self.pos_embed
+        x = self.pos_drop(x)
+
+        features = []
+        rel_pos_bias = self.rel_pos_bias() if self.rel_pos_bias is not None else None
+        for blk in self.blocks:
+            x = blk(x, rel_pos_bias)
+            features.append(x)
+
+        return features
+
+
+@register_model
+def beit_base_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = _cfg()
+    return model
+
+
+@register_model
+def beit_base_patch16_384(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        img_size=384, patch_size=16, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = _cfg()
+    return model
+
+
+@register_model
+def beit_large_patch16_224(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = _cfg()
+    return model
+
+
+@register_model
+def beit_large_patch16_384(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        img_size=384, patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = _cfg()
+    return model
+
+
+@register_model
+def beit_large_patch16_512(pretrained=False, **kwargs):
+    model = VisionTransformer(
+        img_size=512, patch_size=16, embed_dim=1024, depth=24, num_heads=16, mlp_ratio=4, qkv_bias=True,
+        norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
+    model.default_cfg = _cfg()
+    return model
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/main.py
index b7d9bc0eab0..11b3944336c 100644
--- a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/main.py
@@ -1,163 +1,163 @@
-import os
-import tqdm
-import onnx
-import torch
-import logging
-import argparse
-import onnxruntime as ort
-from timm.utils import accuracy
-from torchvision import datasets, transforms
-from timm.data.constants import \
-    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
-
-logger = logging.getLogger(__name__)
-logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                    datefmt = '%m/%d/%Y %H:%M:%S',
-                    level = logging.WARN)
-
-def build_eval_transform(input_size=224, imagenet_default_mean_and_std=False, crop_pct=None):
-    resize_im = input_size > 32
-    imagenet_default_mean_and_std = imagenet_default_mean_and_std
-    mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
-    std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
-
-    t = []
-    if resize_im:
-        if crop_pct is None:
-            if input_size < 384:
-                crop_pct = 224 / 256
-            else:
-                crop_pct = 1.0
-        size = int(input_size / crop_pct)
-        t.append(
-            transforms.Resize(size, interpolation=3),  # to maintain same ratio w.r.t. 224 images
-        )
-        t.append(transforms.CenterCrop(input_size))
-
-    t.append(transforms.ToTensor())
-    t.append(transforms.Normalize(mean, std))
-    return transforms.Compose(t)
-
-def build_val_dataset(data_path):
-    transform = build_eval_transform()
-    root = os.path.join(data_path, 'val')
-    dataset = datasets.ImageFolder(root, transform=transform)
-    return dataset
-
-
-def evaluate_func(data_loader, model):
-    session = ort.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
-    top1, top5 = 0, 0
-
-    for idx, batch in tqdm.tqdm(enumerate(data_loader), desc='eval'):
-        images = batch[0].cpu().detach().numpy()
-        target = batch[-1]
-        output = session.run(None, {'image': images})[0]
-        acc1, acc5 = accuracy(torch.from_numpy(output), target, topk=(1, 5))
-        top1 += acc1.cpu().detach().numpy()
-        top5 += acc5.cpu().detach().numpy()
-        
-    top1 = top1 / len(data_loader)
-    top5 = top5 / len(data_loader)
-    print('* Acc@1 {:.3f} Acc@5 {:.3f}'.format(top1, top5))
-    return top1
-
-if __name__ == '__main__':
-    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
-    parser = argparse.ArgumentParser(
-        description="BEiT quantization examples.",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter
-    )
-    parser.add_argument(
-        '--model_path',
-        type=str,
-        help="Pre-trained model on onnx file"
-    )
-    parser.add_argument(
-        '--dataset_location',
-        type=str,
-        help="Imagenet data path"
-    )
-    parser.add_argument(
-        '--benchmark',
-        action='store_true', \
-        default=False,
-        help="whether benchmark the model"
-    )
-    parser.add_argument(
-        '--tune',
-        action='store_true', \
-        default=False,
-        help="whether quantize the model"
-    )
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help="output model path"
-    )
-    parser.add_argument(
-        '--quant_format',
-        type=str,
-        default='default', 
-        choices=['default', 'QDQ', 'QOperator'],
-        help="quantization format"
-    )
-    parser.add_argument(
-        '--mode',
-        type=str,
-        help="benchmark mode of performance or accuracy"
-    )
-    parser.add_argument(
-        "--batch_size",
-        default=64,
-        type=int,
-    )
-    parser.add_argument(
-        "--num_workers",
-        default=10,
-        type=int,
-    )
-    args = parser.parse_args()
-
-    val_dataset = build_val_dataset(args.dataset_location)
-    val_sampler = torch.utils.data.SequentialSampler(val_dataset)
-    val_data_loader = torch.utils.data.DataLoader(
-        val_dataset, sampler=val_sampler,
-        batch_size=int(1.5 * args.batch_size),
-        num_workers=args.num_workers,
-        drop_last=False
-    )
-    
-    def eval(model):
-        return evaluate_func(val_data_loader, model)
-    
-    model = onnx.load(args.model_path)
-
-    if args.tune:
-        from neural_compressor import PostTrainingQuantConfig, quantization
-        from neural_compressor.utils.constant import FP32
-
-        config = PostTrainingQuantConfig(approach="static",
-                                         quant_format=args.quant_format,
-                                         op_type_dict={'Conv': FP32},
-                                         op_name_dict={'/blocks.*/mlp/fc2/MatMul': FP32},
-                                         recipes={'optypes_to_exclude_output_quant': ['MatMul']},
-                                        )
-        q_model = quantization.fit(model, 
-                                   config, 
-                                   calib_dataloader=val_data_loader, 
-                                   eval_func=eval)
-        q_model.save(args.output_model)
-
-    if args.benchmark:
-        if args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
-            fit(model, conf, b_dataloader=val_data_loader)
-        elif args.mode == 'accuracy':
-            acc_result = evaluate_func(val_data_loader, model)
-            print("Batch size = %d" % val_data_loader.batch_size)
-            print("Accuracy: %.5f" % acc_result)
-        
-
+import os
+import tqdm
+import onnx
+import torch
+import logging
+import argparse
+import onnxruntime as ort
+from timm.utils import accuracy
+from torchvision import datasets, transforms
+from timm.data.constants import \
+    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
+
+logger = logging.getLogger(__name__)
+logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+                    datefmt = '%m/%d/%Y %H:%M:%S',
+                    level = logging.WARN)
+
+def build_eval_transform(input_size=224, imagenet_default_mean_and_std=False, crop_pct=None):
+    resize_im = input_size > 32
+    imagenet_default_mean_and_std = imagenet_default_mean_and_std
+    mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
+    std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
+
+    t = []
+    if resize_im:
+        if crop_pct is None:
+            if input_size < 384:
+                crop_pct = 224 / 256
+            else:
+                crop_pct = 1.0
+        size = int(input_size / crop_pct)
+        t.append(
+            transforms.Resize(size, interpolation=3),  # to maintain same ratio w.r.t. 224 images
+        )
+        t.append(transforms.CenterCrop(input_size))
+
+    t.append(transforms.ToTensor())
+    t.append(transforms.Normalize(mean, std))
+    return transforms.Compose(t)
+
+def build_val_dataset(data_path):
+    transform = build_eval_transform()
+    root = os.path.join(data_path, 'val')
+    dataset = datasets.ImageFolder(root, transform=transform)
+    return dataset
+
+
+def evaluate_func(data_loader, model):
+    session = ort.InferenceSession(model.SerializeToString(), providers=["CPUExecutionProvider"])
+    top1, top5 = 0, 0
+
+    for idx, batch in tqdm.tqdm(enumerate(data_loader), desc='eval'):
+        images = batch[0].cpu().detach().numpy()
+        target = batch[-1]
+        output = session.run(None, {'image': images})[0]
+        acc1, acc5 = accuracy(torch.from_numpy(output), target, topk=(1, 5))
+        top1 += acc1.cpu().detach().numpy()
+        top5 += acc5.cpu().detach().numpy()
+        
+    top1 = top1 / len(data_loader)
+    top5 = top5 / len(data_loader)
+    print('* Acc@1 {:.3f} Acc@5 {:.3f}'.format(top1, top5))
+    return top1
+
+if __name__ == '__main__':
+    logger.info("Evaluating ONNXRuntime full precision accuracy and performance:")
+    parser = argparse.ArgumentParser(
+        description="BEiT quantization examples.",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument(
+        '--model_path',
+        type=str,
+        help="Pre-trained model on onnx file"
+    )
+    parser.add_argument(
+        '--dataset_location',
+        type=str,
+        help="Imagenet data path"
+    )
+    parser.add_argument(
+        '--benchmark',
+        action='store_true', \
+        default=False,
+        help="whether benchmark the model"
+    )
+    parser.add_argument(
+        '--tune',
+        action='store_true', \
+        default=False,
+        help="whether quantize the model"
+    )
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help="output model path"
+    )
+    parser.add_argument(
+        '--quant_format',
+        type=str,
+        default='default', 
+        choices=['default', 'QDQ', 'QOperator'],
+        help="quantization format"
+    )
+    parser.add_argument(
+        '--mode',
+        type=str,
+        help="benchmark mode of performance or accuracy"
+    )
+    parser.add_argument(
+        "--batch_size",
+        default=64,
+        type=int,
+    )
+    parser.add_argument(
+        "--num_workers",
+        default=10,
+        type=int,
+    )
+    args = parser.parse_args()
+
+    val_dataset = build_val_dataset(args.dataset_location)
+    val_sampler = torch.utils.data.SequentialSampler(val_dataset)
+    val_data_loader = torch.utils.data.DataLoader(
+        val_dataset, sampler=val_sampler,
+        batch_size=int(1.5 * args.batch_size),
+        num_workers=args.num_workers,
+        drop_last=False
+    )
+    
+    def eval(model):
+        return evaluate_func(val_data_loader, model)
+    
+    model = onnx.load(args.model_path)
+
+    if args.tune:
+        from neural_compressor import PostTrainingQuantConfig, quantization
+        from neural_compressor.utils.constant import FP32
+
+        config = PostTrainingQuantConfig(approach="static",
+                                         quant_format=args.quant_format,
+                                         op_type_dict={'Conv': FP32},
+                                         op_name_dict={'/blocks.*/mlp/fc2/MatMul': FP32},
+                                         recipes={'optypes_to_exclude_output_quant': ['MatMul']},
+                                        )
+        q_model = quantization.fit(model, 
+                                   config, 
+                                   calib_dataloader=val_data_loader, 
+                                   eval_func=eval)
+        q_model.save(args.output_model)
+
+    if args.benchmark:
+        if args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            conf = BenchmarkConfig(warmup=10, iteration=1000, cores_per_instance=4, num_of_instance=1)
+            fit(model, conf, b_dataloader=val_data_loader)
+        elif args.mode == 'accuracy':
+            acc_result = evaluate_func(val_data_loader, model)
+            print("Batch size = %d" % val_data_loader.batch_size)
+            print("Accuracy: %.5f" % acc_result)
+        
+
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py
similarity index 96%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py
index 59d6af78763..8cc54149f69 100644
--- a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py
+++ b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/prepare_model.py
@@ -1,102 +1,102 @@
-import argparse
-import os
-import sys
-import torch
-from urllib import request
-from timm.models import create_model
-import beit_modeling_finetune
-
-MODEL_URLS = {"beit_base_patch16_224": "https://github.com/addf400/files/releases/download/v1.0/beit_base_patch16_224_pt22k_ft22kto1k.pth",}
-MODEL_FILES = {"beit_base_patch16_224": "beit_base_patch16_224_pt22k_ft22kto1k.pth"}
-MAX_TIMES_RETRY_DOWNLOAD = 5
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_model", type=str, required=False, default="beit_base_patch16_224")
-    parser.add_argument("--output_model", type=str, required=True)
-    return parser.parse_args()
-
-
-def progressbar(cur, total=100):
-    percent = '{:.2%}'.format(cur / total)
-    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
-    sys.stdout.flush()
-
-
-def schedule(blocknum, blocksize, totalsize):
-    if totalsize == 0:
-        percent = 0
-    else:
-        percent = min(1.0, blocknum * blocksize / totalsize) * 100
-    progressbar(percent)
-
-
-def download_model(input_model, retry_times=5):
-    model_url = MODEL_URLS[input_model]
-    model_file = MODEL_FILES[input_model]
-    if os.path.isfile(model_file):
-        print(f"{model_file} exists, skip download")
-        return True
-
-    print("download model...")
-    retries = 0
-    while retries < retry_times:
-        try:
-            request.urlretrieve(model_url, model_file, schedule)
-            break
-        except KeyboardInterrupt:
-            return False
-        except:
-            retries += 1
-            print(f"Download failed{', Retry downloading...' if retries < retry_times else '!'}")
-    return retries < retry_times
-
-
-def export_model(input_model, output_model):
-    print("\nexport model...")
-
-    model = create_model(
-        input_model,
-        pretrained=False,
-        num_classes=1000,
-        drop_rate=0.0,
-        drop_path_rate=0.1,
-        attn_drop_rate=0.0,
-        drop_block_rate=None,
-        use_mean_pooling=True,
-        init_scale=0.001,
-        use_rel_pos_bias=True,
-        use_abs_pos_emb=False,
-        init_values=0.1,
-        )
-
-    checkpoint = torch.load(MODEL_FILES[input_model], map_location='cpu')
-    model.load_state_dict(checkpoint['model'])
-    print("Resume checkpoint %s" % MODEL_FILES[input_model])
-
-    model.eval()
-    x = torch.randn(1, 3, 224, 224, requires_grad=True)
-    torch.onnx.export(model,
-                      x,
-                      output_model,
-                      export_params=True,
-                      opset_version=13,
-                      do_constant_folding=True,
-                      input_names = ["image"],
-                      output_names = ["output"],
-                      dynamic_axes={"image" : {0 : "batch_size"},
-                                    "output" : {0 : "batch_size"}}
-                    )
-    assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
-
-
-def prepare_model(input_model, output_model):
-    is_download_successful = download_model(args.input_model, MAX_TIMES_RETRY_DOWNLOAD)
-    if is_download_successful:
-        export_model(input_model, output_model)
-
-
-if __name__ == "__main__":
-    args = parse_arguments()
-    prepare_model(args.input_model, args.output_model)
+import argparse
+import os
+import sys
+import torch
+from urllib import request
+from timm.models import create_model
+import beit_modeling_finetune
+
+MODEL_URLS = {"beit_base_patch16_224": "https://github.com/addf400/files/releases/download/v1.0/beit_base_patch16_224_pt22k_ft22kto1k.pth",}
+MODEL_FILES = {"beit_base_patch16_224": "beit_base_patch16_224_pt22k_ft22kto1k.pth"}
+MAX_TIMES_RETRY_DOWNLOAD = 5
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_model", type=str, required=False, default="beit_base_patch16_224")
+    parser.add_argument("--output_model", type=str, required=True)
+    return parser.parse_args()
+
+
+def progressbar(cur, total=100):
+    percent = '{:.2%}'.format(cur / total)
+    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
+    sys.stdout.flush()
+
+
+def schedule(blocknum, blocksize, totalsize):
+    if totalsize == 0:
+        percent = 0
+    else:
+        percent = min(1.0, blocknum * blocksize / totalsize) * 100
+    progressbar(percent)
+
+
+def download_model(input_model, retry_times=5):
+    model_url = MODEL_URLS[input_model]
+    model_file = MODEL_FILES[input_model]
+    if os.path.isfile(model_file):
+        print(f"{model_file} exists, skip download")
+        return True
+
+    print("download model...")
+    retries = 0
+    while retries < retry_times:
+        try:
+            request.urlretrieve(model_url, model_file, schedule)
+            break
+        except KeyboardInterrupt:
+            return False
+        except:
+            retries += 1
+            print(f"Download failed{', Retry downloading...' if retries < retry_times else '!'}")
+    return retries < retry_times
+
+
+def export_model(input_model, output_model):
+    print("\nexport model...")
+
+    model = create_model(
+        input_model,
+        pretrained=False,
+        num_classes=1000,
+        drop_rate=0.0,
+        drop_path_rate=0.1,
+        attn_drop_rate=0.0,
+        drop_block_rate=None,
+        use_mean_pooling=True,
+        init_scale=0.001,
+        use_rel_pos_bias=True,
+        use_abs_pos_emb=False,
+        init_values=0.1,
+        )
+
+    checkpoint = torch.load(MODEL_FILES[input_model], map_location='cpu')
+    model.load_state_dict(checkpoint['model'])
+    print("Resume checkpoint %s" % MODEL_FILES[input_model])
+
+    model.eval()
+    x = torch.randn(1, 3, 224, 224, requires_grad=True)
+    torch.onnx.export(model,
+                      x,
+                      output_model,
+                      export_params=True,
+                      opset_version=13,
+                      do_constant_folding=True,
+                      input_names = ["image"],
+                      output_names = ["output"],
+                      dynamic_axes={"image" : {0 : "batch_size"},
+                                    "output" : {0 : "batch_size"}}
+                    )
+    assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
+
+
+def prepare_model(input_model, output_model):
+    is_download_successful = download_model(args.input_model, MAX_TIMES_RETRY_DOWNLOAD)
+    if is_download_successful:
+        export_model(input_model, output_model)
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
+    prepare_model(args.input_model, args.output_model)
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt
similarity index 94%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt
index b6855b21b0a..9d9d5ee0915 100644
--- a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/requirements.txt
@@ -1,6 +1,6 @@
-torch
-torchvision
-timm
-onnx
-onnxruntime
+torch
+torchvision
+timm
+onnx
+onnxruntime
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/beit/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/beit/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/beit/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v2/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/mobilenet_v3/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/alexnet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/caffenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/densenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/efficientnet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/fcn/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/googlenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/inception/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mnist/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/mobilenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/resnet50/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/shufflenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/squeezenet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/vgg16/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/onnx_model_zoo/zfnet/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/resnet50_mlperf/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/resnet50_torchvision/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/image_recognition/vgg16/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_data.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/prepare_data.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/bert/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/bert/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/bert/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_data.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_data.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/distilbert/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/distilbert/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py
index 5182ec6bbc5..98ff6d044e3 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/main.py
@@ -1,263 +1,263 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
-# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for language modeling on a text file (GPT, GPT-2, BERT, RoBERTa).
-GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa are fine-tuned
-using a masked language modeling (MLM) loss.
-"""
-
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import logging
-import os
-import pickle
-import numpy as np
-import torch
-from torch.utils.data import DataLoader, Dataset, SequentialSampler
-import onnx
-import onnxruntime as  ort
-from torch.nn import CrossEntropyLoss
-from tqdm import tqdm
-from transformers import GPT2Tokenizer
-
-logger = logging.getLogger(__name__)
-
-
-class TextDataset(Dataset):
-    def __init__(self, tokenizer, args, file_path='train', block_size=1024):
-        assert os.path.isfile(file_path)
-        directory, filename = os.path.split(file_path)
-        if not os.path.exists("./dataset_cached"):
-            os.makedirs("./dataset_cached")
-        cached_features_file = os.path.join("./dataset_cached", 
-            args.model_name_or_path.split('/')[-1] + '_cached_lm_' + str(block_size) + '_' + filename)
-
-        if os.path.exists(cached_features_file) and not args.overwrite_cache:
-            logger.info("Loading features from cached file %s", cached_features_file)
-            with open(cached_features_file, 'rb') as handle:
-                self.examples = pickle.load(handle)
-        else:
-            logger.info("Creating features from dataset file at %s", directory)
-
-            self.examples = []
-            with open(file_path, encoding="utf-8") as f:
-                text = f.read()
-
-            tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
-
-            for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
-                self.examples.append(tokenizer.build_inputs_with_special_tokens(tokenized_text[i:i+block_size]))
-            # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
-            # If your dataset is small, first you should loook for a bigger one :-) and second you
-            # can change this behavior by adding (model specific) padding.
-
-            logger.info("Saving features into cached file %s", cached_features_file)
-            with open(cached_features_file, 'wb') as handle:
-                pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
-
-    def __len__(self):
-        return len(self.examples)
-
-    def __getitem__(self, item):
-        inputs = torch.tensor(self.examples[item])
-        inputs = np.array(inputs)
-        inputs = np.expand_dims(inputs, axis=0)
-        return inputs, inputs
-
-def load_and_cache_examples(args, tokenizer, evaluate=False):
-    dataset = TextDataset(tokenizer, args, file_path=args.data_path, block_size=args.block_size)
-    return dataset
-
-def evaluate(args, model, tokenizer, prefix=""):
-    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)
-
-    eval_sampler = SequentialSampler(eval_dataset)
-    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
-
-    # Eval!
-    logger.info("***** Running evaluation {} *****".format(prefix))
-    logger.info("  Num examples = %d", len(eval_dataset))
-    logger.info("  Batch size = %d", args.eval_batch_size)
-    eval_loss = 0.0
-    nb_eval_steps = 0
-    import timeit
-    total_time = 0.0
-
-    options = ort.SessionOptions()
-    session = ort.InferenceSession(model.SerializeToString(), options,
-        providers=ort.get_available_providers())
-    len_outputs = len(session.get_outputs())
-    len_inputs = len(session.get_inputs())
-    inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-    ort_inputs = {}
-
-    for idx, (inputs, labels) in enumerate(tqdm(eval_dataloader, desc="Evaluating")):
-        if nb_eval_steps >= args.warmup_steps:
-            start = timeit.default_timer()
-        inputs = inputs.to(args.device)
-        labels = labels.to(args.device)
-        for i in range(len_inputs):
-            inputs = np.array(inputs)
-            ort_inputs.update({inputs_names[i]: inputs})
-        predictions = session.run(None, ort_inputs)
-        lm_logits = predictions[0]
-        lm_logits = torch.from_numpy(lm_logits)
-        # Shift so that tokens < n predict n
-        shift_logits = lm_logits[..., :-1, :].contiguous()
-        shift_labels = labels[..., 1:].contiguous()
-        # Flatten the tokens
-        loss_fct = CrossEntropyLoss(ignore_index=-1)
-        lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
-                        shift_labels.view(-1))
-        
-        if nb_eval_steps >= args.warmup_steps:
-            total_time += (timeit.default_timer() - start)
-        eval_loss += lm_loss.mean().item()
-        nb_eval_steps += 1
-
-        if args.iter > 0 and nb_eval_steps > (args.warmup_steps + args.iter):
-            break
-
-    if nb_eval_steps >= args.warmup_steps:
-        perf = (nb_eval_steps - args.warmup_steps) * args.eval_batch_size / total_time
-        if args.eval_batch_size == 1:
-            print('Latency: %.3f ms' % (total_time / (nb_eval_steps - args.warmup_steps) * 1000))
-        print("Throughput: {} samples/s".format(perf))
-    else:
-        logger.info("*****no performance, please check dataset length and warmup number *****")
-
-    eval_loss = eval_loss / nb_eval_steps
-    perplexity = torch.exp(torch.tensor(eval_loss))
-
-    result = {
-        "perplexity": perplexity
-    }
-    logger.info("***** Eval results {} *****".format(prefix))
-    for key in sorted(result.keys()):
-        logger.info("  %s = %s", key, str(result[key]))
-    
-    if args.benchmark and args.mode == "accuracy":
-        print("Batch size = %d" % args.eval_batch_size)
-        print("Accuracy: %.5f" % (result['perplexity'].item()))
-    
-    return result['perplexity'].item()
-
-def main():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--model_path', type=str,  required=True, 
-                        help='Pre-trained bert model onnx file.')
-    parser.add_argument("--data_path", type=str, required=True,
-                        help="Input evaluation data file to evaluate the perplexity on (a text file).")
-    parser.add_argument("--model_name_or_path", type=str,
-                        help="The model checkpoint for weights initialization.")
-    parser.add_argument("--cache_dir", default="", type=str,
-                        help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)")
-    parser.add_argument("--block_size", default=1024, type=int,
-                        help="Optional input sequence length after tokenization."
-                             "The training dataset will be truncated in block of this size for training."
-                             "Default to the model max input length for single sentence inputs (take into account special tokens).")
-    parser.add_argument("--eval_batch_size", default=1, type=int,
-                        help="Batch size for evaluation.")
-    parser.add_argument('--overwrite_cache', action='store_true',
-                        help="Overwrite the cached training and evaluation sets")
-    parser.add_argument('--tune',action='store_true', default=False,
-                        help='Get bert tuning quantization model with neural_compressor.')
-    parser.add_argument('--output_model',type=str, default='gpt2_tune.onnx',
-                        help='output model path and name')
-    parser.add_argument('--benchmark',action='store_true', default=False,
-                        help='Get benchmark performance of quantized model.')
-    parser.add_argument('--mode', type=str, 
-                        help="benchmark mode of performance or accuracy")
-    parser.add_argument("--warmup_steps", default=10, type=int,
-                        help="Linear warmup over warmup_steps.")
-    parser.add_argument('-i', "--iter", default=0, type=int,
-                        help='For accuracy measurement only.')
-    args = parser.parse_args()
-    args.device = torch.device("cpu")
-
-    # Setup logging
-    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                        datefmt = '%m/%d/%Y %H:%M:%S',
-                        level = logging.INFO)
-
-    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path,
-                                                use_fast=True,
-                                                cache_dir=args.cache_dir if args.cache_dir else None)
-    if args.block_size <= 0:
-        args.block_size = tokenizer.max_len_single_sentence  # Our input block size will be the max possible for the model
-    args.block_size = min(args.block_size, tokenizer.max_len_single_sentence)
-    
-    logger.info("Training/evaluation parameters %s", args)
-            
-    ds = load_and_cache_examples(args, tokenizer, evaluate=True)
-
-    def eval_func(model):
-        return evaluate(args, model, tokenizer)
-
-    if args.benchmark:
-        model = onnx.load(args.model_path)
-        if args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=4,
-                                   num_of_instance=1)
-            b_dataloader = DataLoader(ds, args.eval_batch_size)
-            fit(model, conf, b_dataloader=b_dataloader)
-        else:
-            evaluate(args, model, tokenizer)
-        
-    if args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        opt_options = FusionOptions('gpt2')
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            'gpt2',
-            num_heads=12,
-            hidden_size=768,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-        
-        # check the optimized model is valid
-        try:
-            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(args.model_path)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        from neural_compressor.config import AccuracyCriterion
-        accuracy_criterion = AccuracyCriterion()
-        accuracy_criterion.higher_is_better = False
-        accuracy_criterion.relative = 0.05
-        config = PostTrainingQuantConfig(approach='dynamic', 
-                                         accuracy_criterion=accuracy_criterion)
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func)
-        q_model.save(args.output_model)
-
-
-if __name__ == "__main__":
-    main()
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the library models for language modeling on a text file (GPT, GPT-2, BERT, RoBERTa).
+GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa are fine-tuned
+using a masked language modeling (MLM) loss.
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import logging
+import os
+import pickle
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, Dataset, SequentialSampler
+import onnx
+import onnxruntime as  ort
+from torch.nn import CrossEntropyLoss
+from tqdm import tqdm
+from transformers import GPT2Tokenizer
+
+logger = logging.getLogger(__name__)
+
+
+class TextDataset(Dataset):
+    def __init__(self, tokenizer, args, file_path='train', block_size=1024):
+        assert os.path.isfile(file_path)
+        directory, filename = os.path.split(file_path)
+        if not os.path.exists("./dataset_cached"):
+            os.makedirs("./dataset_cached")
+        cached_features_file = os.path.join("./dataset_cached", 
+            args.model_name_or_path.split('/')[-1] + '_cached_lm_' + str(block_size) + '_' + filename)
+
+        if os.path.exists(cached_features_file) and not args.overwrite_cache:
+            logger.info("Loading features from cached file %s", cached_features_file)
+            with open(cached_features_file, 'rb') as handle:
+                self.examples = pickle.load(handle)
+        else:
+            logger.info("Creating features from dataset file at %s", directory)
+
+            self.examples = []
+            with open(file_path, encoding="utf-8") as f:
+                text = f.read()
+
+            tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
+
+            for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
+                self.examples.append(tokenizer.build_inputs_with_special_tokens(tokenized_text[i:i+block_size]))
+            # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
+            # If your dataset is small, first you should loook for a bigger one :-) and second you
+            # can change this behavior by adding (model specific) padding.
+
+            logger.info("Saving features into cached file %s", cached_features_file)
+            with open(cached_features_file, 'wb') as handle:
+                pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, item):
+        inputs = torch.tensor(self.examples[item])
+        inputs = np.array(inputs)
+        inputs = np.expand_dims(inputs, axis=0)
+        return inputs, inputs
+
+def load_and_cache_examples(args, tokenizer, evaluate=False):
+    dataset = TextDataset(tokenizer, args, file_path=args.data_path, block_size=args.block_size)
+    return dataset
+
+def evaluate(args, model, tokenizer, prefix=""):
+    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)
+
+    eval_sampler = SequentialSampler(eval_dataset)
+    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
+
+    # Eval!
+    logger.info("***** Running evaluation {} *****".format(prefix))
+    logger.info("  Num examples = %d", len(eval_dataset))
+    logger.info("  Batch size = %d", args.eval_batch_size)
+    eval_loss = 0.0
+    nb_eval_steps = 0
+    import timeit
+    total_time = 0.0
+
+    options = ort.SessionOptions()
+    session = ort.InferenceSession(model.SerializeToString(), options,
+        providers=ort.get_available_providers())
+    len_outputs = len(session.get_outputs())
+    len_inputs = len(session.get_inputs())
+    inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
+    ort_inputs = {}
+
+    for idx, (inputs, labels) in enumerate(tqdm(eval_dataloader, desc="Evaluating")):
+        if nb_eval_steps >= args.warmup_steps:
+            start = timeit.default_timer()
+        inputs = inputs.to(args.device)
+        labels = labels.to(args.device)
+        for i in range(len_inputs):
+            inputs = np.array(inputs)
+            ort_inputs.update({inputs_names[i]: inputs})
+        predictions = session.run(None, ort_inputs)
+        lm_logits = predictions[0]
+        lm_logits = torch.from_numpy(lm_logits)
+        # Shift so that tokens < n predict n
+        shift_logits = lm_logits[..., :-1, :].contiguous()
+        shift_labels = labels[..., 1:].contiguous()
+        # Flatten the tokens
+        loss_fct = CrossEntropyLoss(ignore_index=-1)
+        lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
+                        shift_labels.view(-1))
+        
+        if nb_eval_steps >= args.warmup_steps:
+            total_time += (timeit.default_timer() - start)
+        eval_loss += lm_loss.mean().item()
+        nb_eval_steps += 1
+
+        if args.iter > 0 and nb_eval_steps > (args.warmup_steps + args.iter):
+            break
+
+    if nb_eval_steps >= args.warmup_steps:
+        perf = (nb_eval_steps - args.warmup_steps) * args.eval_batch_size / total_time
+        if args.eval_batch_size == 1:
+            print('Latency: %.3f ms' % (total_time / (nb_eval_steps - args.warmup_steps) * 1000))
+        print("Throughput: {} samples/s".format(perf))
+    else:
+        logger.info("*****no performance, please check dataset length and warmup number *****")
+
+    eval_loss = eval_loss / nb_eval_steps
+    perplexity = torch.exp(torch.tensor(eval_loss))
+
+    result = {
+        "perplexity": perplexity
+    }
+    logger.info("***** Eval results {} *****".format(prefix))
+    for key in sorted(result.keys()):
+        logger.info("  %s = %s", key, str(result[key]))
+    
+    if args.benchmark and args.mode == "accuracy":
+        print("Batch size = %d" % args.eval_batch_size)
+        print("Accuracy: %.5f" % (result['perplexity'].item()))
+    
+    return result['perplexity'].item()
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--model_path', type=str,  required=True, 
+                        help='Pre-trained bert model onnx file.')
+    parser.add_argument("--data_path", type=str, required=True,
+                        help="Input evaluation data file to evaluate the perplexity on (a text file).")
+    parser.add_argument("--model_name_or_path", type=str,
+                        help="The model checkpoint for weights initialization.")
+    parser.add_argument("--cache_dir", default="", type=str,
+                        help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)")
+    parser.add_argument("--block_size", default=1024, type=int,
+                        help="Optional input sequence length after tokenization."
+                             "The training dataset will be truncated in block of this size for training."
+                             "Default to the model max input length for single sentence inputs (take into account special tokens).")
+    parser.add_argument("--eval_batch_size", default=1, type=int,
+                        help="Batch size for evaluation.")
+    parser.add_argument('--overwrite_cache', action='store_true',
+                        help="Overwrite the cached training and evaluation sets")
+    parser.add_argument('--tune',action='store_true', default=False,
+                        help='Get bert tuning quantization model with neural_compressor.')
+    parser.add_argument('--output_model',type=str, default='gpt2_tune.onnx',
+                        help='output model path and name')
+    parser.add_argument('--benchmark',action='store_true', default=False,
+                        help='Get benchmark performance of quantized model.')
+    parser.add_argument('--mode', type=str, 
+                        help="benchmark mode of performance or accuracy")
+    parser.add_argument("--warmup_steps", default=10, type=int,
+                        help="Linear warmup over warmup_steps.")
+    parser.add_argument('-i', "--iter", default=0, type=int,
+                        help='For accuracy measurement only.')
+    args = parser.parse_args()
+    args.device = torch.device("cpu")
+
+    # Setup logging
+    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+                        datefmt = '%m/%d/%Y %H:%M:%S',
+                        level = logging.INFO)
+
+    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path,
+                                                use_fast=True,
+                                                cache_dir=args.cache_dir if args.cache_dir else None)
+    if args.block_size <= 0:
+        args.block_size = tokenizer.max_len_single_sentence  # Our input block size will be the max possible for the model
+    args.block_size = min(args.block_size, tokenizer.max_len_single_sentence)
+    
+    logger.info("Training/evaluation parameters %s", args)
+            
+    ds = load_and_cache_examples(args, tokenizer, evaluate=True)
+
+    def eval_func(model):
+        return evaluate(args, model, tokenizer)
+
+    if args.benchmark:
+        model = onnx.load(args.model_path)
+        if args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            conf = BenchmarkConfig(iteration=100,
+                                   cores_per_instance=4,
+                                   num_of_instance=1)
+            b_dataloader = DataLoader(ds, args.eval_batch_size)
+            fit(model, conf, b_dataloader=b_dataloader)
+        else:
+            evaluate(args, model, tokenizer)
+        
+    if args.tune:
+        # optimize model
+        from onnxruntime.transformers import optimizer
+        from onnxruntime.transformers.fusion_options import FusionOptions
+        opt_options = FusionOptions('gpt2')
+        opt_options.enable_embed_layer_norm = False
+
+        model_optimizer = optimizer.optimize_model(
+            args.model_path,
+            'gpt2',
+            num_heads=12,
+            hidden_size=768,
+            optimization_options=opt_options)
+        model = model_optimizer.model
+        
+        # check the optimized model is valid
+        try:
+            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
+        except Exception as e:
+            logger.warning("Optimized model is invalid: {}. ".format(e))
+            logger.warning("Model optimizer will be skipped. " \
+                           "Try to upgrade onnxruntime to avoid this error")
+            model = onnx.load(args.model_path)
+
+        from neural_compressor import quantization, PostTrainingQuantConfig
+        from neural_compressor.config import AccuracyCriterion
+        accuracy_criterion = AccuracyCriterion()
+        accuracy_criterion.higher_is_better = False
+        accuracy_criterion.relative = 0.05
+        config = PostTrainingQuantConfig(approach='dynamic', 
+                                         accuracy_criterion=accuracy_criterion)
+        q_model = quantization.fit(model, 
+                                   config,
+                                   eval_func=eval_func)
+        q_model.save(args.output_model)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py
index 5e07fdc3462..d2025fa24e6 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/prepare_model.py
@@ -1,82 +1,82 @@
-import argparse
-import os
-
-import numpy as np
-import torch
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
-
-MODELS = {
-    'gpt2':(GPT2LMHeadModel, GPT2Tokenizer, 'gpt2'),
-    'distilgpt2': (GPT2LMHeadModel, GPT2Tokenizer, 'distilgpt2')
-}
-data_dir = 'test_data_set_0'
-
-
-def flatten(inputs):
-    return [[flatten(i) for i in inputs] if isinstance(inputs, (list, tuple)) else inputs]
-
-
-def update_flatten_list(inputs, res_list):
-    for i in inputs:
-        res_list.append(i) if not isinstance(i, (list, tuple)) else update_flatten_list(i, res_list)
-    return res_list
-
-
-def save_model(name, model, inputs, outputs, input_names=None, output_names=None, **kwargs):
-    if hasattr(model, 'train'):
-        model.train(False)
-
-    inputs_flatten = flatten(inputs)
-    inputs_flatten = update_flatten_list(inputs_flatten, [])
-    outputs_flatten = flatten(outputs)
-    outputs_flatten = update_flatten_list(outputs_flatten, [])
-    if input_names is None:
-        input_names = []
-        for i, _ in enumerate(inputs_flatten):
-            input_names.append('input' + str(i+1))
-    else:
-        np.testing.assert_equal(len(input_names), len(inputs_flatten),
-                                "Number of input names provided is not equal to the number of inputs.")
-
-    if output_names is None:
-        output_names = []
-        for i, _ in enumerate(outputs_flatten):
-            output_names.append('output' + str(i+1))
-    else:
-        np.testing.assert_equal(len(output_names), len(outputs_flatten),
-                                "Number of output names provided is not equal to the number of output.")
-
-    if isinstance(model, torch.jit.ScriptModule):
-        torch.onnx._export(model, inputs, name, verbose=True, input_names=input_names,
-                           output_names=output_names, **kwargs)
-    else:
-        torch.onnx.export(model, inputs, name, verbose=True, input_names=input_names,
-                          output_names=output_names, **kwargs)
-
-    assert os.path.exists(name), f"Export failed, {name} doesn't exist!"
-
-
-def gpt2_test():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--input_model', type=str,  required=False, default="gpt2")
-    parser.add_argument('--output_model', type=str,  required=True, 
-                        help='model name or path.')
-    args = parser.parse_args()
-
-    model_class, tokenizer_class, pretrained_weights = MODELS[args.input_model]
-        # Load pretrained model/tokenizer
-    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
-    model = model_class.from_pretrained(pretrained_weights)
-    model.eval()
-    # Encode text
-    # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
-    input_ids_1 = torch.tensor(
-        [[tokenizer.encode("Here is some text to encode Hello World", add_special_tokens=True)]])
-    with torch.no_grad():
-        output_1 = model(input_ids_1)  # Models outputs are now tuples
-
-    save_model(args.output_model, model.cpu(), input_ids_1, output_1, opset_version=14, input_names=['input1'], dynamic_axes={'input1': [0, 1, 2]})
-
-
-gpt2_test()
+import argparse
+import os
+
+import numpy as np
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+
+MODELS = {
+    'gpt2':(GPT2LMHeadModel, GPT2Tokenizer, 'gpt2'),
+    'distilgpt2': (GPT2LMHeadModel, GPT2Tokenizer, 'distilgpt2')
+}
+data_dir = 'test_data_set_0'
+
+
+def flatten(inputs):
+    return [[flatten(i) for i in inputs] if isinstance(inputs, (list, tuple)) else inputs]
+
+
+def update_flatten_list(inputs, res_list):
+    for i in inputs:
+        res_list.append(i) if not isinstance(i, (list, tuple)) else update_flatten_list(i, res_list)
+    return res_list
+
+
+def save_model(name, model, inputs, outputs, input_names=None, output_names=None, **kwargs):
+    if hasattr(model, 'train'):
+        model.train(False)
+
+    inputs_flatten = flatten(inputs)
+    inputs_flatten = update_flatten_list(inputs_flatten, [])
+    outputs_flatten = flatten(outputs)
+    outputs_flatten = update_flatten_list(outputs_flatten, [])
+    if input_names is None:
+        input_names = []
+        for i, _ in enumerate(inputs_flatten):
+            input_names.append('input' + str(i+1))
+    else:
+        np.testing.assert_equal(len(input_names), len(inputs_flatten),
+                                "Number of input names provided is not equal to the number of inputs.")
+
+    if output_names is None:
+        output_names = []
+        for i, _ in enumerate(outputs_flatten):
+            output_names.append('output' + str(i+1))
+    else:
+        np.testing.assert_equal(len(output_names), len(outputs_flatten),
+                                "Number of output names provided is not equal to the number of output.")
+
+    if isinstance(model, torch.jit.ScriptModule):
+        torch.onnx._export(model, inputs, name, verbose=True, input_names=input_names,
+                           output_names=output_names, **kwargs)
+    else:
+        torch.onnx.export(model, inputs, name, verbose=True, input_names=input_names,
+                          output_names=output_names, **kwargs)
+
+    assert os.path.exists(name), f"Export failed, {name} doesn't exist!"
+
+
+def gpt2_test():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--input_model', type=str,  required=False, default="gpt2")
+    parser.add_argument('--output_model', type=str,  required=True, 
+                        help='model name or path.')
+    args = parser.parse_args()
+
+    model_class, tokenizer_class, pretrained_weights = MODELS[args.input_model]
+        # Load pretrained model/tokenizer
+    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
+    model = model_class.from_pretrained(pretrained_weights)
+    model.eval()
+    # Encode text
+    # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
+    input_ids_1 = torch.tensor(
+        [[tokenizer.encode("Here is some text to encode Hello World", add_special_tokens=True)]])
+    with torch.no_grad():
+        output_1 = model(input_ids_1)  # Models outputs are now tuples
+
+    save_model(args.output_model, model.cpu(), input_ids_1, output_1, opset_version=14, input_names=['input1'], dynamic_axes={'input1': [0, 1, 2]})
+
+
+gpt2_test()
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt
index 77e932f112b..30bc5148c17 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/requirements.txt
@@ -1,8 +1,8 @@
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-torch
-sympy
+transformers
+accelerate
+onnx
+onnxruntime
+coloredlogs
+torch
+sympy
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py
index 8229f34894d..7b5169968c1 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/main.py
@@ -1,272 +1,272 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
-# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for language modeling on a text file (GPT, GPT-2, BERT, RoBERTa).
-GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa are fine-tuned
-using a masked language modeling (MLM) loss.
-"""
-
-from __future__ import absolute_import, division, print_function
-
-import argparse
-import logging
-import os
-import pickle
-import numpy as np
-import torch
-from torch.utils.data import DataLoader, Dataset, SequentialSampler
-import onnx
-import onnxruntime as  ort
-from torch.nn import CrossEntropyLoss
-from tqdm import tqdm
-from transformers import GPT2Tokenizer
-
-logger = logging.getLogger(__name__)
-
-
-class TextDataset(Dataset):
-    def __init__(self, tokenizer, args, file_path='train', block_size=1024):
-        assert os.path.isfile(file_path)
-        directory, filename = os.path.split(file_path)
-        if not os.path.exists("./dataset_cached"):
-            os.makedirs("./dataset_cached")
-        cached_features_file = os.path.join("./dataset_cached", 
-            args.model_name_or_path.split('/')[-1] + '_cached_lm_' + str(block_size) + '_' + filename)
-
-        if os.path.exists(cached_features_file) and not args.overwrite_cache:
-            logger.info("Loading features from cached file %s", cached_features_file)
-            with open(cached_features_file, 'rb') as handle:
-                self.examples = pickle.load(handle)
-        else:
-            logger.info("Creating features from dataset file at %s", directory)
-
-            self.examples = []
-            with open(file_path, encoding="utf-8") as f:
-                text = f.read()
-
-            tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
-
-            for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
-                self.examples.append(tokenizer.build_inputs_with_special_tokens(tokenized_text[i:i+block_size]))
-            # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
-            # If your dataset is small, first you should loook for a bigger one :-) and second you
-            # can change this behavior by adding (model specific) padding.
-
-            logger.info("Saving features into cached file %s", cached_features_file)
-            with open(cached_features_file, 'wb') as handle:
-                pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
-
-    def __len__(self):
-        return len(self.examples)
-
-    def __getitem__(self, item):
-        inputs = torch.tensor(self.examples[item])
-        inputs = np.array(inputs)
-        inputs = np.expand_dims(inputs, axis=0)
-        return inputs, inputs
-
-def load_and_cache_examples(args, tokenizer, evaluate=False):
-    dataset = TextDataset(tokenizer, args, file_path=args.data_path, block_size=args.block_size)
-    return dataset
-
-def evaluate(args, model, tokenizer, prefix=""):
-    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)
-
-    eval_sampler = SequentialSampler(eval_dataset)
-    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
-
-    # Eval!
-    logger.info("***** Running evaluation {} *****".format(prefix))
-    logger.info("  Num examples = %d", len(eval_dataset))
-    logger.info("  Batch size = %d", args.eval_batch_size)
-    eval_loss = 0.0
-    nb_eval_steps = 0
-    import timeit
-    total_time = 0.0
-
-    options = ort.SessionOptions()
-    session = ort.InferenceSession(model.SerializeToString(), options,
-        providers=ort.get_available_providers())
-    len_outputs = len(session.get_outputs())
-    len_inputs = len(session.get_inputs())
-    inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
-    ort_inputs = {}
-
-    for idx, (inputs, labels) in enumerate(tqdm(eval_dataloader, desc="Evaluating")):
-        if nb_eval_steps >= args.warmup_steps:
-            start = timeit.default_timer()
-        inputs = inputs.to(args.device)
-        labels = labels.to(args.device)
-        for i in range(len_inputs):
-            inputs = np.array(inputs)
-            ort_inputs.update({inputs_names[i]: inputs})
-        predictions = session.run(None, ort_inputs)
-        lm_logits = predictions[0]
-        lm_logits = torch.from_numpy(lm_logits)
-        # Shift so that tokens < n predict n
-        shift_logits = lm_logits[..., :-1, :].contiguous()
-        shift_labels = labels[..., 1:].contiguous()
-        # Flatten the tokens
-        loss_fct = CrossEntropyLoss(ignore_index=-1)
-        lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
-                        shift_labels.view(-1))
-        
-        if nb_eval_steps >= args.warmup_steps:
-            total_time += (timeit.default_timer() - start)
-        eval_loss += lm_loss.mean().item()
-        nb_eval_steps += 1
-
-        if args.iter > 0 and nb_eval_steps > (args.warmup_steps + args.iter):
-            break
-
-    if nb_eval_steps >= args.warmup_steps:
-        perf = (nb_eval_steps - args.warmup_steps) * args.eval_batch_size / total_time
-        if args.eval_batch_size == 1:
-            print('Latency: %.3f ms' % (total_time / (nb_eval_steps - args.warmup_steps) * 1000))
-        print("Throughput: {} samples/s".format(perf))
-    else:
-        logger.info("*****no performance, please check dataset length and warmup number *****")
-
-    eval_loss = eval_loss / nb_eval_steps
-    perplexity = torch.exp(torch.tensor(eval_loss))
-
-    result = {
-        "perplexity": perplexity
-    }
-    logger.info("***** Eval results {} *****".format(prefix))
-    for key in sorted(result.keys()):
-        logger.info("  %s = %s", key, str(result[key]))
-    
-    if args.benchmark and args.mode == "accuracy":
-        print("Batch size = %d" % args.eval_batch_size)
-        print("Accuracy: %.5f" % (result['perplexity'].item()))
-    
-    return result['perplexity'].item()
-
-def main():
-    parser = argparse.ArgumentParser()
-
-    parser.add_argument('--model_path', type=str,  required=True, 
-                        help='Pre-trained bert model onnx file.')
-    parser.add_argument("--data_path", type=str, required=True,
-                        help="Input evaluation data file to evaluate the perplexity on (a text file).")
-    parser.add_argument("--model_name_or_path", type=str,
-                        help="The model checkpoint for weights initialization.")
-    parser.add_argument("--cache_dir", default="", type=str,
-                        help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)")
-    parser.add_argument("--block_size", default=1024, type=int,
-                        help="Optional input sequence length after tokenization."
-                             "The training dataset will be truncated in block of this size for training."
-                             "Default to the model max input length for single sentence inputs (take into account special tokens).")
-    parser.add_argument("--eval_batch_size", default=1, type=int,
-                        help="Batch size for evaluation.")
-    parser.add_argument('--overwrite_cache', action='store_true',
-                        help="Overwrite the cached training and evaluation sets")
-    parser.add_argument('--tune',action='store_true', default=False,
-                        help='Get bert tuning quantization model with neural_compressor.')
-    parser.add_argument('--output_model',type=str, default='gpt2_tune.onnx',
-                        help='output model path and name')
-    parser.add_argument('--benchmark',action='store_true', default=False,
-                        help='Get benchmark performance of quantized model.')
-    parser.add_argument('--mode', type=str, 
-                        help="benchmark mode of performance or accuracy")
-    parser.add_argument("--warmup_steps", default=10, type=int,
-                        help="Linear warmup over warmup_steps.")
-    parser.add_argument('-i', "--iter", default=0, type=int,
-                        help='For accuracy measurement only.')
-    args = parser.parse_args()
-    args.device = torch.device("cpu")
-
-    # Setup logging
-    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
-                        datefmt = '%m/%d/%Y %H:%M:%S',
-                        level = logging.INFO)
-
-    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path,
-                                                use_fast=True,
-                                                cache_dir=args.cache_dir if args.cache_dir else None)
-    if args.block_size <= 0:
-        args.block_size = tokenizer.max_len_single_sentence  # Our input block size will be the max possible for the model
-    args.block_size = min(args.block_size, tokenizer.max_len_single_sentence)
-    
-    logger.info("Training/evaluation parameters %s", args)
-            
-    ds = load_and_cache_examples(args, tokenizer, evaluate=True)
-
-    def eval_func(model):
-        return evaluate(args, model, tokenizer)
-
-    if args.benchmark:
-        model = onnx.load(args.model_path)
-        if args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=4,
-                                   num_of_instance=1)
-            b_dataloader = DataLoader(ds, args.eval_batch_size)
-            fit(model, conf, b_dataloader=b_dataloader)
-        else:
-            evaluate(args, model, tokenizer)
-        
-    if args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        opt_options = FusionOptions('gpt2')
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            args.model_path,
-            'gpt2',
-            num_heads=12,
-            hidden_size=768,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-
-        # check the optimized model is valid
-        try:
-            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(args.model_path)
-
-        from neural_compressor import quantization
-        from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig
-        from neural_compressor.utils.constant import FP32
-        accuracy_criterion = AccuracyCriterion()
-        accuracy_criterion.higher_is_better = False
-        accuracy_criterion.relative = 0.11
-        fp32_op_names = None
-        if args.model_name_or_path == 'distilgpt2':
-            fp32_op_names = ['Attention_5_matmul']
-        elif args.model_name_or_path == 'gpt2':
-            fp32_op_names = ['Attention_11_matmul']
-        config = PostTrainingQuantConfig(approach='static', 
-                                         op_type_dict={'Add':FP32},
-                                         accuracy_criterion=accuracy_criterion,
-                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names} if fp32_op_names else None,)
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func,
-                                   calib_dataloader=DataLoader(ds, args.eval_batch_size))
-        q_model.save(args.output_model)
-
-
-if __name__ == "__main__":
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
+# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the library models for language modeling on a text file (GPT, GPT-2, BERT, RoBERTa).
+GPT and GPT-2 are fine-tuned using a causal language modeling (CLM) loss while BERT and RoBERTa are fine-tuned
+using a masked language modeling (MLM) loss.
+"""
+
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import logging
+import os
+import pickle
+import numpy as np
+import torch
+from torch.utils.data import DataLoader, Dataset, SequentialSampler
+import onnx
+import onnxruntime as  ort
+from torch.nn import CrossEntropyLoss
+from tqdm import tqdm
+from transformers import GPT2Tokenizer
+
+logger = logging.getLogger(__name__)
+
+
+class TextDataset(Dataset):
+    def __init__(self, tokenizer, args, file_path='train', block_size=1024):
+        assert os.path.isfile(file_path)
+        directory, filename = os.path.split(file_path)
+        if not os.path.exists("./dataset_cached"):
+            os.makedirs("./dataset_cached")
+        cached_features_file = os.path.join("./dataset_cached", 
+            args.model_name_or_path.split('/')[-1] + '_cached_lm_' + str(block_size) + '_' + filename)
+
+        if os.path.exists(cached_features_file) and not args.overwrite_cache:
+            logger.info("Loading features from cached file %s", cached_features_file)
+            with open(cached_features_file, 'rb') as handle:
+                self.examples = pickle.load(handle)
+        else:
+            logger.info("Creating features from dataset file at %s", directory)
+
+            self.examples = []
+            with open(file_path, encoding="utf-8") as f:
+                text = f.read()
+
+            tokenized_text = tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text))
+
+            for i in range(0, len(tokenized_text)-block_size+1, block_size): # Truncate in block of block_size
+                self.examples.append(tokenizer.build_inputs_with_special_tokens(tokenized_text[i:i+block_size]))
+            # Note that we are losing the last truncated example here for the sake of simplicity (no padding)
+            # If your dataset is small, first you should loook for a bigger one :-) and second you
+            # can change this behavior by adding (model specific) padding.
+
+            logger.info("Saving features into cached file %s", cached_features_file)
+            with open(cached_features_file, 'wb') as handle:
+                pickle.dump(self.examples, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    def __len__(self):
+        return len(self.examples)
+
+    def __getitem__(self, item):
+        inputs = torch.tensor(self.examples[item])
+        inputs = np.array(inputs)
+        inputs = np.expand_dims(inputs, axis=0)
+        return inputs, inputs
+
+def load_and_cache_examples(args, tokenizer, evaluate=False):
+    dataset = TextDataset(tokenizer, args, file_path=args.data_path, block_size=args.block_size)
+    return dataset
+
+def evaluate(args, model, tokenizer, prefix=""):
+    eval_dataset = load_and_cache_examples(args, tokenizer, evaluate=True)
+
+    eval_sampler = SequentialSampler(eval_dataset)
+    eval_dataloader = DataLoader(eval_dataset, sampler=eval_sampler, batch_size=args.eval_batch_size)
+
+    # Eval!
+    logger.info("***** Running evaluation {} *****".format(prefix))
+    logger.info("  Num examples = %d", len(eval_dataset))
+    logger.info("  Batch size = %d", args.eval_batch_size)
+    eval_loss = 0.0
+    nb_eval_steps = 0
+    import timeit
+    total_time = 0.0
+
+    options = ort.SessionOptions()
+    session = ort.InferenceSession(model.SerializeToString(), options,
+        providers=ort.get_available_providers())
+    len_outputs = len(session.get_outputs())
+    len_inputs = len(session.get_inputs())
+    inputs_names = [session.get_inputs()[i].name for i in range(len_inputs)]
+    ort_inputs = {}
+
+    for idx, (inputs, labels) in enumerate(tqdm(eval_dataloader, desc="Evaluating")):
+        if nb_eval_steps >= args.warmup_steps:
+            start = timeit.default_timer()
+        inputs = inputs.to(args.device)
+        labels = labels.to(args.device)
+        for i in range(len_inputs):
+            inputs = np.array(inputs)
+            ort_inputs.update({inputs_names[i]: inputs})
+        predictions = session.run(None, ort_inputs)
+        lm_logits = predictions[0]
+        lm_logits = torch.from_numpy(lm_logits)
+        # Shift so that tokens < n predict n
+        shift_logits = lm_logits[..., :-1, :].contiguous()
+        shift_labels = labels[..., 1:].contiguous()
+        # Flatten the tokens
+        loss_fct = CrossEntropyLoss(ignore_index=-1)
+        lm_loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)),
+                        shift_labels.view(-1))
+        
+        if nb_eval_steps >= args.warmup_steps:
+            total_time += (timeit.default_timer() - start)
+        eval_loss += lm_loss.mean().item()
+        nb_eval_steps += 1
+
+        if args.iter > 0 and nb_eval_steps > (args.warmup_steps + args.iter):
+            break
+
+    if nb_eval_steps >= args.warmup_steps:
+        perf = (nb_eval_steps - args.warmup_steps) * args.eval_batch_size / total_time
+        if args.eval_batch_size == 1:
+            print('Latency: %.3f ms' % (total_time / (nb_eval_steps - args.warmup_steps) * 1000))
+        print("Throughput: {} samples/s".format(perf))
+    else:
+        logger.info("*****no performance, please check dataset length and warmup number *****")
+
+    eval_loss = eval_loss / nb_eval_steps
+    perplexity = torch.exp(torch.tensor(eval_loss))
+
+    result = {
+        "perplexity": perplexity
+    }
+    logger.info("***** Eval results {} *****".format(prefix))
+    for key in sorted(result.keys()):
+        logger.info("  %s = %s", key, str(result[key]))
+    
+    if args.benchmark and args.mode == "accuracy":
+        print("Batch size = %d" % args.eval_batch_size)
+        print("Accuracy: %.5f" % (result['perplexity'].item()))
+    
+    return result['perplexity'].item()
+
+def main():
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('--model_path', type=str,  required=True, 
+                        help='Pre-trained bert model onnx file.')
+    parser.add_argument("--data_path", type=str, required=True,
+                        help="Input evaluation data file to evaluate the perplexity on (a text file).")
+    parser.add_argument("--model_name_or_path", type=str,
+                        help="The model checkpoint for weights initialization.")
+    parser.add_argument("--cache_dir", default="", type=str,
+                        help="Optional directory to store the pre-trained models downloaded from s3 (instead of the default one)")
+    parser.add_argument("--block_size", default=1024, type=int,
+                        help="Optional input sequence length after tokenization."
+                             "The training dataset will be truncated in block of this size for training."
+                             "Default to the model max input length for single sentence inputs (take into account special tokens).")
+    parser.add_argument("--eval_batch_size", default=1, type=int,
+                        help="Batch size for evaluation.")
+    parser.add_argument('--overwrite_cache', action='store_true',
+                        help="Overwrite the cached training and evaluation sets")
+    parser.add_argument('--tune',action='store_true', default=False,
+                        help='Get bert tuning quantization model with neural_compressor.')
+    parser.add_argument('--output_model',type=str, default='gpt2_tune.onnx',
+                        help='output model path and name')
+    parser.add_argument('--benchmark',action='store_true', default=False,
+                        help='Get benchmark performance of quantized model.')
+    parser.add_argument('--mode', type=str, 
+                        help="benchmark mode of performance or accuracy")
+    parser.add_argument("--warmup_steps", default=10, type=int,
+                        help="Linear warmup over warmup_steps.")
+    parser.add_argument('-i', "--iter", default=0, type=int,
+                        help='For accuracy measurement only.')
+    args = parser.parse_args()
+    args.device = torch.device("cpu")
+
+    # Setup logging
+    logging.basicConfig(format = '%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
+                        datefmt = '%m/%d/%Y %H:%M:%S',
+                        level = logging.INFO)
+
+    tokenizer = GPT2Tokenizer.from_pretrained(args.model_name_or_path,
+                                                use_fast=True,
+                                                cache_dir=args.cache_dir if args.cache_dir else None)
+    if args.block_size <= 0:
+        args.block_size = tokenizer.max_len_single_sentence  # Our input block size will be the max possible for the model
+    args.block_size = min(args.block_size, tokenizer.max_len_single_sentence)
+    
+    logger.info("Training/evaluation parameters %s", args)
+            
+    ds = load_and_cache_examples(args, tokenizer, evaluate=True)
+
+    def eval_func(model):
+        return evaluate(args, model, tokenizer)
+
+    if args.benchmark:
+        model = onnx.load(args.model_path)
+        if args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            conf = BenchmarkConfig(iteration=100,
+                                   cores_per_instance=4,
+                                   num_of_instance=1)
+            b_dataloader = DataLoader(ds, args.eval_batch_size)
+            fit(model, conf, b_dataloader=b_dataloader)
+        else:
+            evaluate(args, model, tokenizer)
+        
+    if args.tune:
+        # optimize model
+        from onnxruntime.transformers import optimizer
+        from onnxruntime.transformers.fusion_options import FusionOptions
+        opt_options = FusionOptions('gpt2')
+        opt_options.enable_embed_layer_norm = False
+
+        model_optimizer = optimizer.optimize_model(
+            args.model_path,
+            'gpt2',
+            num_heads=12,
+            hidden_size=768,
+            optimization_options=opt_options)
+        model = model_optimizer.model
+
+        # check the optimized model is valid
+        try:
+            ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
+        except Exception as e:
+            logger.warning("Optimized model is invalid: {}. ".format(e))
+            logger.warning("Model optimizer will be skipped. " \
+                           "Try to upgrade onnxruntime to avoid this error")
+            model = onnx.load(args.model_path)
+
+        from neural_compressor import quantization
+        from neural_compressor.config import AccuracyCriterion, PostTrainingQuantConfig
+        from neural_compressor.utils.constant import FP32
+        accuracy_criterion = AccuracyCriterion()
+        accuracy_criterion.higher_is_better = False
+        accuracy_criterion.relative = 0.11
+        fp32_op_names = None
+        if args.model_name_or_path == 'distilgpt2':
+            fp32_op_names = ['Attention_5_matmul']
+        elif args.model_name_or_path == 'gpt2':
+            fp32_op_names = ['Attention_11_matmul']
+        config = PostTrainingQuantConfig(approach='static', 
+                                         op_type_dict={'Add':FP32},
+                                         accuracy_criterion=accuracy_criterion,
+                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names} if fp32_op_names else None,)
+        q_model = quantization.fit(model, 
+                                   config,
+                                   eval_func=eval_func,
+                                   calib_dataloader=DataLoader(ds, args.eval_batch_size))
+        q_model.save(args.output_model)
+
+
+if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py
index c5ed2bc5d2a..7b115ab2199 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/prepare_model.py
@@ -1,80 +1,80 @@
-import argparse
-import os
-
-import numpy as np
-import torch
-from transformers import GPT2LMHeadModel, GPT2Tokenizer
-
-MODELS = {
-    'gpt2':(GPT2LMHeadModel, GPT2Tokenizer, 'gpt2'),
-    'distilgpt2': (GPT2LMHeadModel, GPT2Tokenizer, 'distilgpt2')
-}
-data_dir = 'test_data_set_0'
-
-
-def flatten(inputs):
-    return [[flatten(i) for i in inputs] if isinstance(inputs, (list, tuple)) else inputs]
-
-
-def update_flatten_list(inputs, res_list):
-    for i in inputs:
-        res_list.append(i) if not isinstance(i, (list, tuple)) else update_flatten_list(i, res_list)
-    return res_list
-
-
-def save_model(name, model, inputs, outputs, input_names=None, output_names=None, **kwargs):
-    if hasattr(model, 'train'):
-        model.train(False)
-
-    inputs_flatten = flatten(inputs)
-    inputs_flatten = update_flatten_list(inputs_flatten, [])
-    outputs_flatten = flatten(outputs)
-    outputs_flatten = update_flatten_list(outputs_flatten, [])
-    if input_names is None:
-        input_names = []
-        for i, _ in enumerate(inputs_flatten):
-            input_names.append('input' + str(i+1))
-    else:
-        np.testing.assert_equal(len(input_names), len(inputs_flatten),
-                                "Number of input names provided is not equal to the number of inputs.")
-
-    if output_names is None:
-        output_names = []
-        for i, _ in enumerate(outputs_flatten):
-            output_names.append('output' + str(i+1))
-    else:
-        np.testing.assert_equal(len(output_names), len(outputs_flatten),
-                                "Number of output names provided is not equal to the number of output.")
-
-    if isinstance(model, torch.jit.ScriptModule):
-        torch.onnx._export(model, inputs, name, verbose=True, input_names=input_names,
-                           output_names=output_names, **kwargs)
-    else:
-        torch.onnx.export(model, inputs, name, verbose=True, input_names=input_names,
-                          output_names=output_names, **kwargs)
-    assert os.path.exists(name), f"Export failed, {name} doesn't exist!"
-
-
-def gpt2_test():
-    parser = argparse.ArgumentParser()
-    parser.add_argument('--input_model', type=str,  required=False, default="gpt2")
-    parser.add_argument('--output_model', type=str,  required=True, 
-                        help='model name or path.')
-    args = parser.parse_args()
-
-    model_class, tokenizer_class, pretrained_weights = MODELS[args.input_model]
-        # Load pretrained model/tokenizer
-    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
-    model = model_class.from_pretrained(pretrained_weights)
-    model.eval()
-    # Encode text
-    # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
-    input_ids_1 = torch.tensor(
-        [[tokenizer.encode("Here is some text to encode Hello World", add_special_tokens=True)]])
-    with torch.no_grad():
-        output_1 = model(input_ids_1)  # Models outputs are now tuples
-
-    save_model(args.output_model, model.cpu(), input_ids_1, output_1, opset_version=14, input_names=['input1'], dynamic_axes={'input1': [0, 1, 2]}) 
-
-
-gpt2_test()
+import argparse
+import os
+
+import numpy as np
+import torch
+from transformers import GPT2LMHeadModel, GPT2Tokenizer
+
+MODELS = {
+    'gpt2':(GPT2LMHeadModel, GPT2Tokenizer, 'gpt2'),
+    'distilgpt2': (GPT2LMHeadModel, GPT2Tokenizer, 'distilgpt2')
+}
+data_dir = 'test_data_set_0'
+
+
+def flatten(inputs):
+    return [[flatten(i) for i in inputs] if isinstance(inputs, (list, tuple)) else inputs]
+
+
+def update_flatten_list(inputs, res_list):
+    for i in inputs:
+        res_list.append(i) if not isinstance(i, (list, tuple)) else update_flatten_list(i, res_list)
+    return res_list
+
+
+def save_model(name, model, inputs, outputs, input_names=None, output_names=None, **kwargs):
+    if hasattr(model, 'train'):
+        model.train(False)
+
+    inputs_flatten = flatten(inputs)
+    inputs_flatten = update_flatten_list(inputs_flatten, [])
+    outputs_flatten = flatten(outputs)
+    outputs_flatten = update_flatten_list(outputs_flatten, [])
+    if input_names is None:
+        input_names = []
+        for i, _ in enumerate(inputs_flatten):
+            input_names.append('input' + str(i+1))
+    else:
+        np.testing.assert_equal(len(input_names), len(inputs_flatten),
+                                "Number of input names provided is not equal to the number of inputs.")
+
+    if output_names is None:
+        output_names = []
+        for i, _ in enumerate(outputs_flatten):
+            output_names.append('output' + str(i+1))
+    else:
+        np.testing.assert_equal(len(output_names), len(outputs_flatten),
+                                "Number of output names provided is not equal to the number of output.")
+
+    if isinstance(model, torch.jit.ScriptModule):
+        torch.onnx._export(model, inputs, name, verbose=True, input_names=input_names,
+                           output_names=output_names, **kwargs)
+    else:
+        torch.onnx.export(model, inputs, name, verbose=True, input_names=input_names,
+                          output_names=output_names, **kwargs)
+    assert os.path.exists(name), f"Export failed, {name} doesn't exist!"
+
+
+def gpt2_test():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--input_model', type=str,  required=False, default="gpt2")
+    parser.add_argument('--output_model', type=str,  required=True, 
+                        help='model name or path.')
+    args = parser.parse_args()
+
+    model_class, tokenizer_class, pretrained_weights = MODELS[args.input_model]
+        # Load pretrained model/tokenizer
+    tokenizer = tokenizer_class.from_pretrained(pretrained_weights)
+    model = model_class.from_pretrained(pretrained_weights)
+    model.eval()
+    # Encode text
+    # Add special tokens takes care of adding [CLS], [SEP], <s>... tokens in the right way for each model.
+    input_ids_1 = torch.tensor(
+        [[tokenizer.encode("Here is some text to encode Hello World", add_special_tokens=True)]])
+    with torch.no_grad():
+        output_1 = model(input_ids_1)  # Models outputs are now tuples
+
+    save_model(args.output_model, model.cpu(), input_ids_1, output_1, opset_version=14, input_names=['input1'], dynamic_axes={'input1': [0, 1, 2]}) 
+
+
+gpt2_test()
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt
index 77e932f112b..30bc5148c17 100644
--- a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/requirements.txt
@@ -1,8 +1,8 @@
-transformers
-accelerate
-onnx
-onnxruntime
-coloredlogs
-torch
-sympy
+transformers
+accelerate
+onnx
+onnxruntime
+coloredlogs
+torch
+sympy
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/language_modeling/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py
index f3881bb1fa4..25e56735215 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/main.py
@@ -1,502 +1,502 @@
-#!/usr/bin/env python
-# coding=utf-8
-#  Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-Fine-tuning the library models for question answering.
-"""
-# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
-
-import numpy as np
-import logging
-import os
-from typing import Callable, Dict, List, Optional
-import onnx
-import sys
-from dataclasses import dataclass, field
-from functools import partial
-from typing import Optional
-
-import datasets
-import transformers
-from datasets import load_dataset
-from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-import onnxruntime
-
-from evaluate import load
-from utils_model import ORTModel
-from utils_qa import postprocess_qa_predictions
-
-from neural_compressor.data import DataLoader
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.15.0")
-
-require_version(
-    "datasets>=1.8.0", "To fix: pip install -r examples/onnxruntime/optimization/question-answering/requirements.txt"
-)
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-    input_model: str = field(
-        metadata={"help": "Path to onnx model"}
-    )
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    num_heads: int = field(
-        default=12,
-        metadata={"help": ("onnx model optimize num_heads")},
-    )
-    hidden_size: int = field(
-        default=768,
-        metadata={"help": ("onnx model optimize hidden_size")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_seq_length: int = field(
-        default=512,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer "
-            "than this will be truncated, sequences shorter will be padded."
-        },
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to `max_seq_length`. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
-            "be faster on GPU but will be slower on TPU)."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-            "value if set."
-        },
-    )
-    max_predict_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-            "value if set."
-        },
-    )
-    version_2_with_negative: bool = field(
-        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
-    )
-    null_score_diff_threshold: float = field(
-        default=0.0,
-        metadata={
-            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
-            "the score of the null answer minus this threshold, the null answer is selected for this example. "
-            "Only useful when `version_2_with_negative=True`."
-        },
-    )
-    doc_stride: int = field(
-        default=256,
-        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
-    )
-    n_best_size: int = field(
-        default=20,
-        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
-    )
-    max_answer_length: int = field(
-        default=30,
-        metadata={
-            "help": "The maximum length of an answer that can be generated. This is needed because the start "
-            "and end predictions are not conditioned on one another."
-        },
-    )
-
-    def __post_init__(self):
-        if (
-            self.dataset_name is None
-            and self.train_file is None
-            and self.validation_file is None
-            and self.test_file is None
-        ):
-            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
-            if self.validation_file is not None:
-                extension = self.validation_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
-            if self.test_file is not None:
-                extension = self.test_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
-
-class SQuADDataset():
-    def __init__(
-        self, 
-        dataset,
-        model,
-        label_names: Optional[List[str]] = None,
-    ):  
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = HfArgumentParser(
-        (ModelArguments, DataTrainingArguments, TrainingArguments)
-    )
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(
-            json_file=os.path.abspath(sys.argv[1])
-        )
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-
-    if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
-        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-            "Use --overwrite_output_dir to overcome."
-        )
-
-    os.makedirs(training_args.output_dir, exist_ok=True)
-
-    tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)
-
-    # Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation step(s)
-    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at
-    # https://huggingface.co/datasets/ (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
-    # 'text' is found. You can easily tweak this behavior (see below).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
-        )
-    else:
-        data_files = {}
-        if data_args.train_file is not None:
-            data_files["train"] = data_args.train_file
-            extension = data_args.train_file.split(".")[-1]
-        if data_args.validation_file is not None:
-            data_files["validation"] = data_args.validation_file
-            extension = data_args.validation_file.split(".")[-1]
-        if data_args.test_file is not None:
-            data_files["test"] = data_args.test_file
-            extension = data_args.test_file.split(".")[-1]
-        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc)
-    #  at https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Preprocessing the datasets.
-    column_names = raw_datasets["validation"].column_names
-
-    question_column_name = "question" if "question" in column_names else column_names[0]
-    context_column_name = "context" if "context" in column_names else column_names[1]
-    answer_column_name = "answers" if "answers" in column_names else column_names[2]
-
-    # Validation preprocessing
-    def prepare_validation_features(examples, tokenizer: PreTrainedTokenizer):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Padding side determines if we do (question|context) or (context|question).
-        pad_on_right = tokenizer.padding_side == "right"
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This
-        # results in one example possible giving several features when a context is long, each of those features
-        # having a context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=min(data_args.max_seq_length, tokenizer.model_max_length),
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-
-        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
-        # corresponding example_id and we will store the offset mappings.
-        tokenized_examples["example_id"] = []
-
-        for i in range(len(tokenized_examples["input_ids"])):
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question)
-            sequence_ids = tokenized_examples.sequence_ids(i)
-            context_index = 1 if pad_on_right else 0
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            tokenized_examples["example_id"].append(examples["id"][sample_index])
-
-            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
-            # position is part of the context or not.
-            tokenized_examples["offset_mapping"][i] = [
-                (o if sequence_ids[k] == context_index else None)
-                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
-            ]
-
-        return tokenized_examples
-
-    # Preprocess the evaluation dataset
-    if "validation" not in raw_datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_examples = raw_datasets["validation"]
-    if data_args.max_eval_samples is not None:
-        eval_examples = eval_examples.select(range(data_args.max_eval_samples))
-    eval_dataset = eval_examples.map(
-        partial(prepare_validation_features, tokenizer=tokenizer),
-        batched=True,
-        num_proc=data_args.preprocessing_num_workers,
-        remove_columns=column_names,
-        load_from_cache_file=not data_args.overwrite_cache,
-        desc="Running tokenizer on validation dataset",
-    )
-    if data_args.max_eval_samples is not None:
-        # During Feature creation dataset samples might increase, we will select required samples again
-        eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
-
-    # Post-processing:
-    def post_processing_function(examples, features, predictions, stage="eval"):
-        # Post-processing: we match the start logits and end logits to answers in the original context.
-        predictions = postprocess_qa_predictions(
-            examples=examples,
-            features=features,
-            predictions=predictions,
-            version_2_with_negative=data_args.version_2_with_negative,
-            n_best_size=data_args.n_best_size,
-            max_answer_length=data_args.max_answer_length,
-            null_score_diff_threshold=data_args.null_score_diff_threshold,
-            output_dir=training_args.output_dir,
-            log_level=log_level,
-            prefix=stage,
-        )
-        # Format the result to the format the metric expects.
-        if data_args.version_2_with_negative:
-            formatted_predictions = [
-                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
-            ]
-        else:
-            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
-
-        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
-        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
-
-    metric = load("squad_v2" if data_args.version_2_with_negative else "squad")
-
-    def compute_metrics(p: EvalPrediction):
-        return metric.compute(predictions=p.predictions, references=p.label_ids)
-
-
-    def eval_func(model, *args):
-        # Evaluation
-        logger.info("*** Evaluate ***")
-
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-            label_names=["start_positions", "end_positions"],
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
-        metrics = compute_metrics(predictions)
-        return metrics['f1']
-
-    if model_args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        opt_options = FusionOptions('bert')
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            model_args.input_model,
-            'bert',
-            num_heads=model_args.num_heads,
-            hidden_size=model_args.hidden_size,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-        
-        # check the optimized model is valid
-        try:
-            onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(model_args.input_model)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
-        config = PostTrainingQuantConfig(approach='dynamic')
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func)
-        q_model.save(model_args.save_path)
-
-    if model_args.benchmark:
-        model = onnx.load(model_args.input_model)
-        if model_args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            b_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=28,
-                                   num_of_instance=1)
-            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-            fit(model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == 'accuracy':
-            eval_f1 = eval_func(model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-
-if __name__ == "__main__":
+#!/usr/bin/env python
+# coding=utf-8
+#  Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Fine-tuning the library models for question answering.
+"""
+# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
+
+import numpy as np
+import logging
+import os
+from typing import Callable, Dict, List, Optional
+import onnx
+import sys
+from dataclasses import dataclass, field
+from functools import partial
+from typing import Optional
+
+import datasets
+import transformers
+from datasets import load_dataset
+from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+import onnxruntime
+
+from evaluate import load
+from utils_model import ORTModel
+from utils_qa import postprocess_qa_predictions
+
+from neural_compressor.data import DataLoader
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.15.0")
+
+require_version(
+    "datasets>=1.8.0", "To fix: pip install -r examples/onnxruntime/optimization/question-answering/requirements.txt"
+)
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+    input_model: str = field(
+        metadata={"help": "Path to onnx model"}
+    )
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    num_heads: int = field(
+        default=12,
+        metadata={"help": ("onnx model optimize num_heads")},
+    )
+    hidden_size: int = field(
+        default=768,
+        metadata={"help": ("onnx model optimize hidden_size")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_seq_length: int = field(
+        default=512,
+        metadata={
+            "help": "The maximum total input sequence length after tokenization. Sequences longer "
+            "than this will be truncated, sequences shorter will be padded."
+        },
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to `max_seq_length`. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
+            "be faster on GPU but will be slower on TPU)."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_predict_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
+            "value if set."
+        },
+    )
+    version_2_with_negative: bool = field(
+        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
+    )
+    null_score_diff_threshold: float = field(
+        default=0.0,
+        metadata={
+            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
+            "the score of the null answer minus this threshold, the null answer is selected for this example. "
+            "Only useful when `version_2_with_negative=True`."
+        },
+    )
+    doc_stride: int = field(
+        default=256,
+        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
+    )
+    n_best_size: int = field(
+        default=20,
+        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
+    )
+    max_answer_length: int = field(
+        default=30,
+        metadata={
+            "help": "The maximum length of an answer that can be generated. This is needed because the start "
+            "and end predictions are not conditioned on one another."
+        },
+    )
+
+    def __post_init__(self):
+        if (
+            self.dataset_name is None
+            and self.train_file is None
+            and self.validation_file is None
+            and self.test_file is None
+        ):
+            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
+        else:
+            if self.train_file is not None:
+                extension = self.train_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
+            if self.validation_file is not None:
+                extension = self.validation_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
+            if self.test_file is not None:
+                extension = self.test_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
+
+class SQuADDataset():
+    def __init__(
+        self, 
+        dataset,
+        model,
+        label_names: Optional[List[str]] = None,
+    ):  
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser(
+        (ModelArguments, DataTrainingArguments, TrainingArguments)
+    )
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(
+            json_file=os.path.abspath(sys.argv[1])
+        )
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+
+    log_level = training_args.get_process_log_level()
+    logger.setLevel(log_level)
+    datasets.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.enable_default_handler()
+    transformers.utils.logging.enable_explicit_format()
+
+
+    if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
+        raise ValueError(
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+            "Use --overwrite_output_dir to overcome."
+        )
+
+    os.makedirs(training_args.output_dir, exist_ok=True)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)
+
+    # Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation step(s)
+    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
+    # or just provide the name of one of the public datasets available on the hub at
+    # https://huggingface.co/datasets/ (the dataset will be downloaded automatically from the datasets Hub).
+    #
+    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
+    # 'text' is found. You can easily tweak this behavior (see below).
+    #
+    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
+    # download the dataset.
+    if data_args.dataset_name is not None:
+        # Downloading and loading a dataset from the hub.
+        raw_datasets = load_dataset(
+            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
+        )
+    else:
+        data_files = {}
+        if data_args.train_file is not None:
+            data_files["train"] = data_args.train_file
+            extension = data_args.train_file.split(".")[-1]
+        if data_args.validation_file is not None:
+            data_files["validation"] = data_args.validation_file
+            extension = data_args.validation_file.split(".")[-1]
+        if data_args.test_file is not None:
+            data_files["test"] = data_args.test_file
+            extension = data_args.test_file.split(".")[-1]
+        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
+    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc)
+    #  at https://huggingface.co/docs/datasets/loading_datasets.html.
+
+    # Preprocessing the datasets.
+    column_names = raw_datasets["validation"].column_names
+
+    question_column_name = "question" if "question" in column_names else column_names[0]
+    context_column_name = "context" if "context" in column_names else column_names[1]
+    answer_column_name = "answers" if "answers" in column_names else column_names[2]
+
+    # Validation preprocessing
+    def prepare_validation_features(examples, tokenizer: PreTrainedTokenizer):
+        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+        # left whitespace
+        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+        # Padding side determines if we do (question|context) or (context|question).
+        pad_on_right = tokenizer.padding_side == "right"
+
+        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This
+        # results in one example possible giving several features when a context is long, each of those features
+        # having a context that overlaps a bit the context of the previous feature.
+        tokenized_examples = tokenizer(
+            examples[question_column_name if pad_on_right else context_column_name],
+            examples[context_column_name if pad_on_right else question_column_name],
+            truncation="only_second" if pad_on_right else "only_first",
+            max_length=min(data_args.max_seq_length, tokenizer.model_max_length),
+            stride=data_args.doc_stride,
+            return_overflowing_tokens=True,
+            return_offsets_mapping=True,
+            padding="max_length" if data_args.pad_to_max_length else False,
+        )
+
+        # Since one example might give us several features if it has a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
+        # corresponding example_id and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example (to know what is the context and what is the question)
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans, this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(examples["id"][sample_index])
+
+            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
+
+    # Preprocess the evaluation dataset
+    if "validation" not in raw_datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_examples = raw_datasets["validation"]
+    if data_args.max_eval_samples is not None:
+        eval_examples = eval_examples.select(range(data_args.max_eval_samples))
+    eval_dataset = eval_examples.map(
+        partial(prepare_validation_features, tokenizer=tokenizer),
+        batched=True,
+        num_proc=data_args.preprocessing_num_workers,
+        remove_columns=column_names,
+        load_from_cache_file=not data_args.overwrite_cache,
+        desc="Running tokenizer on validation dataset",
+    )
+    if data_args.max_eval_samples is not None:
+        # During Feature creation dataset samples might increase, we will select required samples again
+        eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
+
+    # Post-processing:
+    def post_processing_function(examples, features, predictions, stage="eval"):
+        # Post-processing: we match the start logits and end logits to answers in the original context.
+        predictions = postprocess_qa_predictions(
+            examples=examples,
+            features=features,
+            predictions=predictions,
+            version_2_with_negative=data_args.version_2_with_negative,
+            n_best_size=data_args.n_best_size,
+            max_answer_length=data_args.max_answer_length,
+            null_score_diff_threshold=data_args.null_score_diff_threshold,
+            output_dir=training_args.output_dir,
+            log_level=log_level,
+            prefix=stage,
+        )
+        # Format the result to the format the metric expects.
+        if data_args.version_2_with_negative:
+            formatted_predictions = [
+                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
+            ]
+        else:
+            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
+
+        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
+        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
+
+    metric = load("squad_v2" if data_args.version_2_with_negative else "squad")
+
+    def compute_metrics(p: EvalPrediction):
+        return metric.compute(predictions=p.predictions, references=p.label_ids)
+
+
+    def eval_func(model, *args):
+        # Evaluation
+        logger.info("*** Evaluate ***")
+
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+            label_names=["start_positions", "end_positions"],
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
+        metrics = compute_metrics(predictions)
+        return metrics['f1']
+
+    if model_args.tune:
+        # optimize model
+        from onnxruntime.transformers import optimizer
+        from onnxruntime.transformers.fusion_options import FusionOptions
+        opt_options = FusionOptions('bert')
+        opt_options.enable_embed_layer_norm = False
+
+        model_optimizer = optimizer.optimize_model(
+            model_args.input_model,
+            'bert',
+            num_heads=model_args.num_heads,
+            hidden_size=model_args.hidden_size,
+            optimization_options=opt_options)
+        model = model_optimizer.model
+        
+        # check the optimized model is valid
+        try:
+            onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
+        except Exception as e:
+            logger.warning("Optimized model is invalid: {}. ".format(e))
+            logger.warning("Model optimizer will be skipped. " \
+                           "Try to upgrade onnxruntime to avoid this error")
+            model = onnx.load(model_args.input_model)
+
+        from neural_compressor import quantization, PostTrainingQuantConfig
+        calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
+        config = PostTrainingQuantConfig(approach='dynamic')
+        q_model = quantization.fit(model, 
+                                   config,
+                                   eval_func=eval_func)
+        q_model.save(model_args.save_path)
+
+    if model_args.benchmark:
+        model = onnx.load(model_args.input_model)
+        if model_args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            b_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
+            conf = BenchmarkConfig(iteration=100,
+                                   cores_per_instance=28,
+                                   num_of_instance=1)
+            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+            fit(model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == 'accuracy':
+            eval_f1 = eval_func(model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+
+if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py
similarity index 98%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py
index 34a37581667..62aa30a5ea7 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/prepare_model.py
@@ -1,78 +1,78 @@
-import argparse
-import os
-
-import torch
-from transformers import AutoConfig, AutoModelForQuestionAnswering
-
-def export_onnx_model(args, model):
-    with torch.no_grad():
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
-        if args.input_model in ['distilbert-base-uncased-distilled-squad',
-                                       'deepset/roberta-large-squad2']:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                         'attention_mask'],
-                            output_names=['start_logits',
-                                          'end_logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                          'attention_mask' : symbolic_names})
-        else:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
-                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask'],
-                            inputs['token_type_ids']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                        'attention_mask',
-                                        'token_type_ids'],
-                            output_names=['start_logits',
-                                          'end_logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                          'attention_mask' : symbolic_names,
-                                          'token_type_ids' : symbolic_names})
-        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
-        print("ONNX Model exported to {0}".format(args.output_model))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export huggingface onnx model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input_model',
-                        type=str,
-                        default='mrm8488/spanbert-finetuned-squadv1',
-                        const='mrm8488/spanbert-finetuned-squadv1',
-                        nargs='?',
-                        choices=[
-                            'mrm8488/spanbert-finetuned-squadv1',
-                            'salti/bert-base-multilingual-cased-finetuned-squad',
-                            'distilbert-base-uncased-distilled-squad',
-                            'bert-large-uncased-whole-word-masking-finetuned-squad',
-                            'deepset/roberta-large-squad2'
-                        ],
-                        help='pretrained model name or path ')
-    parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument(
-        '--max_len',
-        type=int,
-        default=512,
-        help='Maximum length of the sentence pairs')
-    args = parser.parse_args()
-
-    model = AutoModelForQuestionAnswering.from_pretrained(
-        args.input_model,
-        config=AutoConfig.from_pretrained(args.input_model))
-
+import argparse
+import os
+
+import torch
+from transformers import AutoConfig, AutoModelForQuestionAnswering
+
+def export_onnx_model(args, model):
+    with torch.no_grad():
+        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        if args.input_model in ['distilbert-base-uncased-distilled-squad',
+                                       'deepset/roberta-large-squad2']:
+            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
+                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
+            torch.onnx.export(model,                            # model being run
+                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
+                            inputs['attention_mask']),
+                            args.output_model,                  # where to save the model (can be a file or file-like object)
+                            opset_version=14,                   # the ONNX version to export the model
+                            do_constant_folding=True,           # whether to execute constant folding
+                            input_names=['input_ids',           # the model's input names
+                                         'attention_mask'],
+                            output_names=['start_logits',
+                                          'end_logits'],
+                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
+                                          'attention_mask' : symbolic_names})
+        else:
+            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
+                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
+                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
+            torch.onnx.export(model,                            # model being run
+                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
+                            inputs['attention_mask'],
+                            inputs['token_type_ids']),
+                            args.output_model,                  # where to save the model (can be a file or file-like object)
+                            opset_version=14,                   # the ONNX version to export the model
+                            do_constant_folding=True,           # whether to execute constant folding
+                            input_names=['input_ids',           # the model's input names
+                                        'attention_mask',
+                                        'token_type_ids'],
+                            output_names=['start_logits',
+                                          'end_logits'],
+                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
+                                          'attention_mask' : symbolic_names,
+                                          'token_type_ids' : symbolic_names})
+        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
+        print("ONNX Model exported to {0}".format(args.output_model))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export huggingface onnx model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--input_model',
+                        type=str,
+                        default='mrm8488/spanbert-finetuned-squadv1',
+                        const='mrm8488/spanbert-finetuned-squadv1',
+                        nargs='?',
+                        choices=[
+                            'mrm8488/spanbert-finetuned-squadv1',
+                            'salti/bert-base-multilingual-cased-finetuned-squad',
+                            'distilbert-base-uncased-distilled-squad',
+                            'bert-large-uncased-whole-word-masking-finetuned-squad',
+                            'deepset/roberta-large-squad2'
+                        ],
+                        help='pretrained model name or path ')
+    parser.add_argument("--output_model", type=str, required=True)
+    parser.add_argument(
+        '--max_len',
+        type=int,
+        default=512,
+        help='Maximum length of the sentence pairs')
+    args = parser.parse_args()
+
+    model = AutoModelForQuestionAnswering.from_pretrained(
+        args.input_model,
+        config=AutoConfig.from_pretrained(args.input_model))
+
     export_onnx_model(args, model)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt
index 0607f560e8c..f474f54ce83 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt
@@ -1,11 +1,11 @@
-onnx
-onnxruntime
-onnxruntime-extensions; python_version < '3.11'
-transformers
-accelerate
-tensorboard
-numpy==1.23.5
-datasets >= 1.8.0
-torch >= 1.9.0
-evaluate
+onnx
+onnxruntime
+onnxruntime-extensions; python_version < '3.11'
+transformers
+accelerate
+tensorboard
+numpy==1.23.5
+datasets >= 1.8.0
+torch >= 1.9.0
+evaluate
 tqdm
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py
index 190a20d317d..00b4545294d 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py
@@ -1,96 +1,96 @@
-# coding=utf-8
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-A subclass of `IncTrainer` specific to Question-Answering tasks
-"""
-
-from transformers import Trainer
-from transformers.trainer_utils import PredictionOutput
-
-
-class QuestionAnsweringTrainer(Trainer):
-    def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.eval_examples = eval_examples
-        self.post_process_function = post_process_function
-
-    def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
-        eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
-        eval_dataloader = self.get_eval_dataloader(eval_dataset)
-        eval_examples = self.eval_examples if eval_examples is None else eval_examples
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                eval_dataloader,
-                description="Evaluation",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is not None and self.compute_metrics is not None:
-            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
-            metrics = self.compute_metrics(eval_preds)
-
-            # Prefix all keys with metric_key_prefix + '_'
-            for key in list(metrics.keys()):
-                if not key.startswith(f"{metric_key_prefix}_"):
-                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
-            self.log(metrics)
-        else:
-            metrics = {}
-
-        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
-        return metrics
-
-    def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
-        predict_dataloader = self.get_test_dataloader(predict_dataset)
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                predict_dataloader,
-                description="Prediction",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is None or self.compute_metrics is None:
-            return output
-
-        predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
-        metrics = self.compute_metrics(predictions)
-
-        # Prefix all keys with metric_key_prefix + '_'
-        for key in list(metrics.keys()):
-            if not key.startswith(f"{metric_key_prefix}_"):
-                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
+# coding=utf-8
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+A subclass of `IncTrainer` specific to Question-Answering tasks
+"""
+
+from transformers import Trainer
+from transformers.trainer_utils import PredictionOutput
+
+
+class QuestionAnsweringTrainer(Trainer):
+    def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.eval_examples = eval_examples
+        self.post_process_function = post_process_function
+
+    def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
+        eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
+        eval_dataloader = self.get_eval_dataloader(eval_dataset)
+        eval_examples = self.eval_examples if eval_examples is None else eval_examples
+
+        # Temporarily disable metric computation, we will do it in the loop here.
+        compute_metrics = self.compute_metrics
+        self.compute_metrics = None
+        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+        try:
+            output = eval_loop(
+                eval_dataloader,
+                description="Evaluation",
+                # No point gathering the predictions if there are no metrics, otherwise we defer to
+                # self.args.prediction_loss_only
+                prediction_loss_only=True if compute_metrics is None else None,
+                ignore_keys=ignore_keys,
+            )
+        finally:
+            self.compute_metrics = compute_metrics
+
+        if self.post_process_function is not None and self.compute_metrics is not None:
+            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
+            metrics = self.compute_metrics(eval_preds)
+
+            # Prefix all keys with metric_key_prefix + '_'
+            for key in list(metrics.keys()):
+                if not key.startswith(f"{metric_key_prefix}_"):
+                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
+            self.log(metrics)
+        else:
+            metrics = {}
+
+        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
+        return metrics
+
+    def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
+        predict_dataloader = self.get_test_dataloader(predict_dataset)
+
+        # Temporarily disable metric computation, we will do it in the loop here.
+        compute_metrics = self.compute_metrics
+        self.compute_metrics = None
+        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+        try:
+            output = eval_loop(
+                predict_dataloader,
+                description="Prediction",
+                # No point gathering the predictions if there are no metrics, otherwise we defer to
+                # self.args.prediction_loss_only
+                prediction_loss_only=True if compute_metrics is None else None,
+                ignore_keys=ignore_keys,
+            )
+        finally:
+            self.compute_metrics = compute_metrics
+
+        if self.post_process_function is None or self.compute_metrics is None:
+            return output
+
+        predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
+        metrics = self.compute_metrics(predictions)
+
+        # Prefix all keys with metric_key_prefix + '_'
+        for key in list(metrics.keys()):
+            if not key.startswith(f"{metric_key_prefix}_"):
+                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
         return PredictionOutput(predictions=predictions.predictions, label_ids=predictions.label_ids, metrics=metrics)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py
similarity index 98%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py
index 6006e1f90bb..fb25f04ac1b 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py
@@ -1,428 +1,428 @@
-# coding=utf-8
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-Post-processing utilities for question answering.
-"""
-import collections
-import json
-import logging
-import os
-from typing import Optional, Tuple
-
-import numpy as np
-from tqdm.auto import tqdm
-
-
-logger = logging.getLogger(__name__)
-
-
-def postprocess_qa_predictions(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    null_score_diff_threshold: float = 0.0,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
-    original contexts. This is the base postprocessing functions for models that only return start and end logits.
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
-            The threshold used to select the null answer: if the best answer has a score that is less than the score of
-            the null answer minus this threshold, the null answer is selected for this example (note that the score of
-            the null answer for an example giving several features is the minimum of the scores for the null answer on
-            each feature: all features must be aligned on the fact they `want` to predict a null answer).
-            Only useful when :obj:`version_2_with_negative` is :obj:`True`.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 2:
-        raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
-    all_start_logits, all_end_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    if version_2_with_negative:
-        scores_diff_json = collections.OrderedDict()
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_prediction = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_logits = all_start_logits[feature_index]
-            end_logits = all_end_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction.
-            feature_null_score = start_logits[0] + end_logits[0]
-            if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
-                min_null_prediction = {
-                    "offsets": (0, 0),
-                    "score": feature_null_score,
-                    "start_logit": start_logits[0],
-                    "end_logit": end_logits[0],
-                }
-
-            # Go through all possibilities for the `n_best_size` greater start and end logits.
-            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            for start_index in start_indexes:
-                for end_index in end_indexes:
-                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
-                    # to part of the input_ids that are not in the context.
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or offset_mapping[end_index] is None
-                    ):
-                        continue
-                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_logits[start_index] + end_logits[end_index],
-                            "start_logit": start_logits[start_index],
-                            "end_logit": end_logits[end_index],
-                        }
-                    )
-        if version_2_with_negative:
-            # Add the minimum null prediction
-            prelim_predictions.append(min_null_prediction)
-            null_score = min_null_prediction["score"]
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Add back the minimum null prediction if it was removed because of its low score.
-        if version_2_with_negative and not any(p["offsets"] == (0, 0) for p in predictions):
-            predictions.append(min_null_prediction)
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
-            predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction. If the null answer is not possible, this is easy.
-        if not version_2_with_negative:
-            all_predictions[example["id"]] = predictions[0]["text"]
-        else:
-            # Otherwise we first need to find the best non-empty prediction.
-            i = 0
-            while predictions[i]["text"] == "":
-                i += 1
-            best_non_null_pred = predictions[i]
-
-            # Then we compare to the null prediction using the threshold.
-            score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
-            scores_diff_json[example["id"]] = float(score_diff)  # To be JSON-serializable.
-            if score_diff > null_score_diff_threshold:
-                all_predictions[example["id"]] = ""
-            else:
-                all_predictions[example["id"]] = best_non_null_pred["text"]
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
-    return all_predictions
-
-
-def postprocess_qa_predictions_with_beam_search(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    start_n_top: int = 5,
-    end_n_top: int = 5,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
-    original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
-    cls token predictions.
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        start_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
-        end_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 5:
-        raise ValueError("`predictions` should be a tuple with five elements.")
-    start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_score = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_log_prob = start_top_log_probs[feature_index]
-            start_indexes = start_top_index[feature_index]
-            end_log_prob = end_top_log_probs[feature_index]
-            end_indexes = end_top_index[feature_index]
-            feature_null_score = cls_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction
-            if min_null_score is None or feature_null_score < min_null_score:
-                min_null_score = feature_null_score
-
-            # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
-            for i in range(start_n_top):
-                for j in range(end_n_top):
-                    start_index = int(start_indexes[i])
-                    j_index = i * end_n_top + j
-                    end_index = int(end_indexes[j_index])
-                    # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
-                    # p_mask but let's not take any risk)
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or offset_mapping[end_index] is None
-                    ):
-                        continue
-                    # Don't consider answers with a length negative or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_log_prob[i] + end_log_prob[j_index],
-                            "start_log_prob": start_log_prob[i],
-                            "end_log_prob": end_log_prob[j_index],
-                        }
-                    )
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0:
-            predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction and set the probability for the null answer.
-        all_predictions[example["id"]] = predictions[0]["text"]
-        if version_2_with_negative:
-            scores_diff_json[example["id"]] = float(min_null_score)
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
+# coding=utf-8
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Post-processing utilities for question answering.
+"""
+import collections
+import json
+import logging
+import os
+from typing import Optional, Tuple
+
+import numpy as np
+from tqdm.auto import tqdm
+
+
+logger = logging.getLogger(__name__)
+
+
+def postprocess_qa_predictions(
+    examples,
+    features,
+    predictions: Tuple[np.ndarray, np.ndarray],
+    version_2_with_negative: bool = False,
+    n_best_size: int = 20,
+    max_answer_length: int = 30,
+    null_score_diff_threshold: float = 0.0,
+    output_dir: Optional[str] = None,
+    prefix: Optional[str] = None,
+    log_level: Optional[int] = logging.WARNING,
+):
+    """
+    Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
+    original contexts. This is the base postprocessing functions for models that only return start and end logits.
+    Args:
+        examples: The non-preprocessed dataset (see the main script for more information).
+        features: The processed dataset (see the main script for more information).
+        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+            first dimension must match the number of elements of :obj:`features`.
+        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not the underlying dataset contains examples with no answers.
+        n_best_size (:obj:`int`, `optional`, defaults to 20):
+            The total number of n-best predictions to generate when looking for an answer.
+        max_answer_length (:obj:`int`, `optional`, defaults to 30):
+            The maximum length of an answer that can be generated. This is needed because the start and end predictions
+            are not conditioned on one another.
+        null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
+            The threshold used to select the null answer: if the best answer has a score that is less than the score of
+            the null answer minus this threshold, the null answer is selected for this example (note that the score of
+            the null answer for an example giving several features is the minimum of the scores for the null answer on
+            each feature: all features must be aligned on the fact they `want` to predict a null answer).
+            Only useful when :obj:`version_2_with_negative` is :obj:`True`.
+        output_dir (:obj:`str`, `optional`):
+            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+            answers, are saved in `output_dir`.
+        prefix (:obj:`str`, `optional`):
+            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+            ``logging`` log level (e.g., ``logging.WARNING``)
+    """
+    if len(predictions) != 2:
+        raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
+    all_start_logits, all_end_logits = predictions
+
+    if len(predictions[0]) != len(features):
+        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    if version_2_with_negative:
+        scores_diff_json = collections.OrderedDict()
+
+    # Logging.
+    logger.setLevel(log_level)
+    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+    # Let's loop over all the examples!
+    for example_index, example in enumerate(tqdm(examples)):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_prediction = None
+        prelim_predictions = []
+
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_logits = all_start_logits[feature_index]
+            end_logits = all_end_logits[feature_index]
+            # This is what will allow us to map some the positions in our logits to span of texts in the original
+            # context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+            # available in the current feature.
+            token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+            # Update minimum null prediction.
+            feature_null_score = start_logits[0] + end_logits[0]
+            if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
+                min_null_prediction = {
+                    "offsets": (0, 0),
+                    "score": feature_null_score,
+                    "start_logit": start_logits[0],
+                    "end_logit": end_logits[0],
+                }
+
+            # Go through all possibilities for the `n_best_size` greater start and end logits.
+            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
+                    # to part of the input_ids that are not in the context.
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
+                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+                        continue
+                    # Don't consider answer that don't have the maximum context available (if such information is
+                    # provided).
+                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+                        continue
+                    prelim_predictions.append(
+                        {
+                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+                            "score": start_logits[start_index] + end_logits[end_index],
+                            "start_logit": start_logits[start_index],
+                            "end_logit": end_logits[end_index],
+                        }
+                    )
+        if version_2_with_negative:
+            # Add the minimum null prediction
+            prelim_predictions.append(min_null_prediction)
+            null_score = min_null_prediction["score"]
+
+        # Only keep the best `n_best_size` predictions.
+        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+        # Add back the minimum null prediction if it was removed because of its low score.
+        if version_2_with_negative and not any(p["offsets"] == (0, 0) for p in predictions):
+            predictions.append(min_null_prediction)
+
+        # Use the offsets to gather the answer text in the original context.
+        context = example["context"]
+        for pred in predictions:
+            offsets = pred.pop("offsets")
+            pred["text"] = context[offsets[0] : offsets[1]]
+
+        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+        # failure.
+        if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
+            predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
+
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # the LogSumExp trick).
+        scores = np.array([pred.pop("score") for pred in predictions])
+        exp_scores = np.exp(scores - np.max(scores))
+        probs = exp_scores / exp_scores.sum()
+
+        # Include the probabilities in our predictions.
+        for prob, pred in zip(probs, predictions):
+            pred["probability"] = prob
+
+        # Pick the best prediction. If the null answer is not possible, this is easy.
+        if not version_2_with_negative:
+            all_predictions[example["id"]] = predictions[0]["text"]
+        else:
+            # Otherwise we first need to find the best non-empty prediction.
+            i = 0
+            while predictions[i]["text"] == "":
+                i += 1
+            best_non_null_pred = predictions[i]
+
+            # Then we compare to the null prediction using the threshold.
+            score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
+            scores_diff_json[example["id"]] = float(score_diff)  # To be JSON-serializable.
+            if score_diff > null_score_diff_threshold:
+                all_predictions[example["id"]] = ""
+            else:
+                all_predictions[example["id"]] = best_non_null_pred["text"]
+
+        # Make `predictions` JSON-serializable by casting np.float back to float.
+        all_nbest_json[example["id"]] = [
+            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            for pred in predictions
+        ]
+
+    # If we have an output_dir, let's save all those dicts.
+    if output_dir is not None:
+        if not os.path.isdir(output_dir):
+            raise EnvironmentError(f"{output_dir} is not a directory.")
+
+        prediction_file = os.path.join(
+            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+        )
+        nbest_file = os.path.join(
+            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+        )
+        if version_2_with_negative:
+            null_odds_file = os.path.join(
+                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+            )
+
+        logger.info(f"Saving predictions to {prediction_file}.")
+        with open(prediction_file, "w") as writer:
+            writer.write(json.dumps(all_predictions, indent=4) + "\n")
+        logger.info(f"Saving nbest_preds to {nbest_file}.")
+        with open(nbest_file, "w") as writer:
+            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+        if version_2_with_negative:
+            logger.info(f"Saving null_odds to {null_odds_file}.")
+            with open(null_odds_file, "w") as writer:
+                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+    return all_predictions
+
+
+def postprocess_qa_predictions_with_beam_search(
+    examples,
+    features,
+    predictions: Tuple[np.ndarray, np.ndarray],
+    version_2_with_negative: bool = False,
+    n_best_size: int = 20,
+    max_answer_length: int = 30,
+    start_n_top: int = 5,
+    end_n_top: int = 5,
+    output_dir: Optional[str] = None,
+    prefix: Optional[str] = None,
+    log_level: Optional[int] = logging.WARNING,
+):
+    """
+    Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
+    original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
+    cls token predictions.
+    Args:
+        examples: The non-preprocessed dataset (see the main script for more information).
+        features: The processed dataset (see the main script for more information).
+        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+            first dimension must match the number of elements of :obj:`features`.
+        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not the underlying dataset contains examples with no answers.
+        n_best_size (:obj:`int`, `optional`, defaults to 20):
+            The total number of n-best predictions to generate when looking for an answer.
+        max_answer_length (:obj:`int`, `optional`, defaults to 30):
+            The maximum length of an answer that can be generated. This is needed because the start and end predictions
+            are not conditioned on one another.
+        start_n_top (:obj:`int`, `optional`, defaults to 5):
+            The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
+        end_n_top (:obj:`int`, `optional`, defaults to 5):
+            The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
+        output_dir (:obj:`str`, `optional`):
+            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+            answers, are saved in `output_dir`.
+        prefix (:obj:`str`, `optional`):
+            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+            ``logging`` log level (e.g., ``logging.WARNING``)
+    """
+    if len(predictions) != 5:
+        raise ValueError("`predictions` should be a tuple with five elements.")
+    start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
+
+    if len(predictions[0]) != len(features):
+        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
+
+    # Logging.
+    logger.setLevel(log_level)
+    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+    # Let's loop over all the examples!
+    for example_index, example in enumerate(tqdm(examples)):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_score = None
+        prelim_predictions = []
+
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_log_prob = start_top_log_probs[feature_index]
+            start_indexes = start_top_index[feature_index]
+            end_log_prob = end_top_log_probs[feature_index]
+            end_indexes = end_top_index[feature_index]
+            feature_null_score = cls_logits[feature_index]
+            # This is what will allow us to map some the positions in our logits to span of texts in the original
+            # context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+            # available in the current feature.
+            token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+            # Update minimum null prediction
+            if min_null_score is None or feature_null_score < min_null_score:
+                min_null_score = feature_null_score
+
+            # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
+            for i in range(start_n_top):
+                for j in range(end_n_top):
+                    start_index = int(start_indexes[i])
+                    j_index = i * end_n_top + j
+                    end_index = int(end_indexes[j_index])
+                    # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
+                    # p_mask but let's not take any risk)
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length negative or > max_answer_length.
+                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+                        continue
+                    # Don't consider answer that don't have the maximum context available (if such information is
+                    # provided).
+                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+                        continue
+                    prelim_predictions.append(
+                        {
+                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+                            "score": start_log_prob[i] + end_log_prob[j_index],
+                            "start_log_prob": start_log_prob[i],
+                            "end_log_prob": end_log_prob[j_index],
+                        }
+                    )
+
+        # Only keep the best `n_best_size` predictions.
+        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+        # Use the offsets to gather the answer text in the original context.
+        context = example["context"]
+        for pred in predictions:
+            offsets = pred.pop("offsets")
+            pred["text"] = context[offsets[0] : offsets[1]]
+
+        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+        # failure.
+        if len(predictions) == 0:
+            predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
+
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # the LogSumExp trick).
+        scores = np.array([pred.pop("score") for pred in predictions])
+        exp_scores = np.exp(scores - np.max(scores))
+        probs = exp_scores / exp_scores.sum()
+
+        # Include the probabilities in our predictions.
+        for prob, pred in zip(probs, predictions):
+            pred["probability"] = prob
+
+        # Pick the best prediction and set the probability for the null answer.
+        all_predictions[example["id"]] = predictions[0]["text"]
+        if version_2_with_negative:
+            scores_diff_json[example["id"]] = float(min_null_score)
+
+        # Make `predictions` JSON-serializable by casting np.float back to float.
+        all_nbest_json[example["id"]] = [
+            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            for pred in predictions
+        ]
+
+    # If we have an output_dir, let's save all those dicts.
+    if output_dir is not None:
+        if not os.path.isdir(output_dir):
+            raise EnvironmentError(f"{output_dir} is not a directory.")
+
+        prediction_file = os.path.join(
+            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+        )
+        nbest_file = os.path.join(
+            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+        )
+        if version_2_with_negative:
+            null_odds_file = os.path.join(
+                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+            )
+
+        logger.info(f"Saving predictions to {prediction_file}.")
+        with open(prediction_file, "w") as writer:
+            writer.write(json.dumps(all_predictions, indent=4) + "\n")
+        logger.info(f"Saving nbest_preds to {nbest_file}.")
+        with open(nbest_file, "w") as writer:
+            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+        if version_2_with_negative:
+            logger.info(f"Saving null_odds to {null_odds_file}.")
+            with open(null_odds_file, "w") as writer:
+                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
     return all_predictions, scores_diff_json
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py
index 73e977c9559..8a1c5285eea 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/main.py
@@ -1,524 +1,524 @@
-#!/usr/bin/env python
-# coding=utf-8
-#  Copyright 2022 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-Fine-tuning the library models for question answering.
-"""
-# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
-
-import numpy as np
-import logging
-import os
-from typing import List, Optional
-import onnx
-import sys
-from dataclasses import dataclass, field
-from functools import partial
-from typing import Optional
-
-import datasets
-import transformers
-from datasets import load_dataset
-from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-import onnxruntime
-
-from evaluate import load
-from utils_model import ORTModel
-from utils_qa import postprocess_qa_predictions
-from neural_compressor.data import DataLoader
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.15.0")
-
-require_version(
-    "datasets>=1.8.0", "To fix: pip install -r examples/onnxruntime/optimization/question-answering/requirements.txt"
-)
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-    input_model: str = field(
-        metadata={"help": "Path to onnx model"}
-    )
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    num_heads: int = field(
-        default=12,
-        metadata={"help": ("onnx model optimize num_heads")},
-    )
-    hidden_size: int = field(
-        default=768,
-        metadata={"help": ("onnx model optimize hidden_size")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-    quant_format: str = field(
-        default="QOperator",
-        metadata={"help": ("quant format")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_seq_length: int = field(
-        default=512,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer "
-            "than this will be truncated, sequences shorter will be padded."
-        },
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to `max_seq_length`. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
-            "be faster on GPU but will be slower on TPU)."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-            "value if set."
-        },
-    )
-    max_predict_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-            "value if set."
-        },
-    )
-    version_2_with_negative: bool = field(
-        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
-    )
-    null_score_diff_threshold: float = field(
-        default=0.0,
-        metadata={
-            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
-            "the score of the null answer minus this threshold, the null answer is selected for this example. "
-            "Only useful when `version_2_with_negative=True`."
-        },
-    )
-    doc_stride: int = field(
-        default=256,
-        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
-    )
-    n_best_size: int = field(
-        default=20,
-        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
-    )
-    max_answer_length: int = field(
-        default=30,
-        metadata={
-            "help": "The maximum length of an answer that can be generated. This is needed because the start "
-            "and end predictions are not conditioned on one another."
-        },
-    )
-
-    def __post_init__(self):
-        if (
-            self.dataset_name is None
-            and self.train_file is None
-            and self.validation_file is None
-            and self.test_file is None
-        ):
-            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
-            if self.validation_file is not None:
-                extension = self.validation_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
-            if self.test_file is not None:
-                extension = self.test_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
-
-class SQuADDataset():
-    def __init__(
-        self, 
-        dataset,
-        model,
-        label_names: Optional[List[str]] = None,
-    ):  
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-    parser = HfArgumentParser(
-        (ModelArguments, DataTrainingArguments, TrainingArguments)
-    )
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(
-            json_file=os.path.abspath(sys.argv[1])
-        )
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-
-    if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
-        raise ValueError(
-            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-            "Use --overwrite_output_dir to overcome."
-        )
-
-    os.makedirs(training_args.output_dir, exist_ok=True)
-
-    tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)
-
-    # Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation step(s)
-    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at
-    # https://huggingface.co/datasets/ (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
-    # 'text' is found. You can easily tweak this behavior (see below).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
-        )
-    else:
-        data_files = {}
-        if data_args.train_file is not None:
-            data_files["train"] = data_args.train_file
-            extension = data_args.train_file.split(".")[-1]
-        if data_args.validation_file is not None:
-            data_files["validation"] = data_args.validation_file
-            extension = data_args.validation_file.split(".")[-1]
-        if data_args.test_file is not None:
-            data_files["test"] = data_args.test_file
-            extension = data_args.test_file.split(".")[-1]
-        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc)
-    #  at https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Preprocessing the datasets.
-    column_names = raw_datasets["validation"].column_names
-
-    question_column_name = "question" if "question" in column_names else column_names[0]
-    context_column_name = "context" if "context" in column_names else column_names[1]
-    answer_column_name = "answers" if "answers" in column_names else column_names[2]
-
-    # Validation preprocessing
-    def prepare_validation_features(examples, tokenizer: PreTrainedTokenizer):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Padding side determines if we do (question|context) or (context|question).
-        pad_on_right = tokenizer.padding_side == "right"
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This
-        # results in one example possible giving several features when a context is long, each of those features
-        # having a context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=min(data_args.max_seq_length, tokenizer.model_max_length),
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-
-        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
-        # corresponding example_id and we will store the offset mappings.
-        tokenized_examples["example_id"] = []
-
-        for i in range(len(tokenized_examples["input_ids"])):
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question)
-            sequence_ids = tokenized_examples.sequence_ids(i)
-            context_index = 1 if pad_on_right else 0
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            tokenized_examples["example_id"].append(examples["id"][sample_index])
-
-            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
-            # position is part of the context or not.
-            tokenized_examples["offset_mapping"][i] = [
-                (o if sequence_ids[k] == context_index else None)
-                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
-            ]
-
-        return tokenized_examples
-
-    # Preprocess the evaluation dataset
-    if "validation" not in raw_datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_examples = raw_datasets["validation"]
-    if data_args.max_eval_samples is not None:
-        eval_examples = eval_examples.select(range(data_args.max_eval_samples))
-    eval_dataset = eval_examples.map(
-        partial(prepare_validation_features, tokenizer=tokenizer),
-        batched=True,
-        num_proc=data_args.preprocessing_num_workers,
-        remove_columns=column_names,
-        load_from_cache_file=not data_args.overwrite_cache,
-        desc="Running tokenizer on validation dataset",
-    )
-    if data_args.max_eval_samples is not None:
-        # During Feature creation dataset samples might increase, we will select required samples again
-        eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
-
-    # Post-processing:
-    def post_processing_function(examples, features, predictions, stage="eval"):
-        # Post-processing: we match the start logits and end logits to answers in the original context.
-        predictions = postprocess_qa_predictions(
-            examples=examples,
-            features=features,
-            predictions=predictions,
-            version_2_with_negative=data_args.version_2_with_negative,
-            n_best_size=data_args.n_best_size,
-            max_answer_length=data_args.max_answer_length,
-            null_score_diff_threshold=data_args.null_score_diff_threshold,
-            output_dir=training_args.output_dir,
-            log_level=log_level,
-            prefix=stage,
-        )
-        # Format the result to the format the metric expects.
-        if data_args.version_2_with_negative:
-            formatted_predictions = [
-                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
-            ]
-        else:
-            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
-
-        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
-        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
-
-    metric = load("squad_v2" if data_args.version_2_with_negative else "squad")
-
-    def compute_metrics(p: EvalPrediction):
-        return metric.compute(predictions=p.predictions, references=p.label_ids)
-
-
-    def eval_func(model, *args):
-        # Evaluation
-        logger.info("*** Evaluate ***")
-
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-            label_names=["start_positions", "end_positions"],
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
-        metrics = compute_metrics(predictions)
-        return metrics['f1']
-
-    if model_args.tune:
-        # optimize model
-        from onnxruntime.transformers import optimizer
-        from onnxruntime.transformers.fusion_options import FusionOptions
-        opt_options = FusionOptions('bert')
-        opt_options.enable_embed_layer_norm = False
-
-        model_optimizer = optimizer.optimize_model(
-            model_args.input_model,
-            'bert',
-            num_heads=model_args.num_heads,
-            hidden_size=model_args.hidden_size,
-            optimization_options=opt_options)
-        model = model_optimizer.model
-
-        # check the optimized model is valid
-        try:
-            onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
-        except Exception as e:
-            logger.warning("Optimized model is invalid: {}. ".format(e))
-            logger.warning("Model optimizer will be skipped. " \
-                           "Try to upgrade onnxruntime to avoid this error")
-            model = onnx.load(model_args.input_model)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        from neural_compressor.utils.constant import FP32
-        calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
-        fp32_op_names = None
-        if model_args.model_name_or_path == 'mrm8488/spanbert-finetuned-squadv1':
-            fp32_op_names = ['/bert/embeddings/word_embeddings/Gather', 
-                             '/bert/encoder/layer.[5-7|9]/output/dense/MatMul']
-        elif model_args.model_name_or_path == 'salti/bert-base-multilingual-cased-finetuned-squad':
-            fp32_op_names = ['/bert/encoder/layer.[4-5]/output/dense/MatMul']
-        elif model_args.model_name_or_path == 'distilbert-base-uncased-distilled-squad':
-            fp32_op_names = ['/distilbert/transformer/layer.[1-5]/ffn/lin[1-2]/MatMul']
-        elif model_args.model_name_or_path == 'deepset/roberta-large-squad2':
-            fp32_op_names = ['/roberta/encoder/layer.\d+/intermediate/dense/MatMul',
-                             '/roberta/encoder/layer.\d+/output/dense/MatMul']
-        config = PostTrainingQuantConfig(approach='static',
-                                         quant_format=model_args.quant_format,
-                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names} \
-                                            if fp32_op_names else None,)
-        q_model = quantization.fit(model, 
-                                   config,
-                                   eval_func=eval_func,
-                                   calib_dataloader=DataLoader(framework='onnxruntime', 
-                                                               dataset=calib_dataset, 
-                                                               batch_size=model_args.batch_size)
-                                   )
-        q_model.save(model_args.save_path)
-
-    if model_args.benchmark:
-        model = onnx.load(model_args.input_model)
-        if model_args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            b_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
-            conf = BenchmarkConfig(iteration=100,
-                                   cores_per_instance=28,
-                                   num_of_instance=1)
-            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-            fit(model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == 'accuracy':
-            eval_f1 = eval_func(model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+#  Copyright 2022 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Fine-tuning the library models for question answering.
+"""
+# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
+
+import numpy as np
+import logging
+import os
+from typing import List, Optional
+import onnx
+import sys
+from dataclasses import dataclass, field
+from functools import partial
+from typing import Optional
+
+import datasets
+import transformers
+from datasets import load_dataset
+from transformers import AutoTokenizer, EvalPrediction, HfArgumentParser, PreTrainedTokenizer, TrainingArguments
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+import onnxruntime
+
+from evaluate import load
+from utils_model import ORTModel
+from utils_qa import postprocess_qa_predictions
+from neural_compressor.data import DataLoader
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.15.0")
+
+require_version(
+    "datasets>=1.8.0", "To fix: pip install -r examples/onnxruntime/optimization/question-answering/requirements.txt"
+)
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+    input_model: str = field(
+        metadata={"help": "Path to onnx model"}
+    )
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    num_heads: int = field(
+        default=12,
+        metadata={"help": ("onnx model optimize num_heads")},
+    )
+    hidden_size: int = field(
+        default=768,
+        metadata={"help": ("onnx model optimize hidden_size")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+    quant_format: str = field(
+        default="QOperator",
+        metadata={"help": ("quant format")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_seq_length: int = field(
+        default=512,
+        metadata={
+            "help": "The maximum total input sequence length after tokenization. Sequences longer "
+            "than this will be truncated, sequences shorter will be padded."
+        },
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to `max_seq_length`. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
+            "be faster on GPU but will be slower on TPU)."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_predict_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
+            "value if set."
+        },
+    )
+    version_2_with_negative: bool = field(
+        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
+    )
+    null_score_diff_threshold: float = field(
+        default=0.0,
+        metadata={
+            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
+            "the score of the null answer minus this threshold, the null answer is selected for this example. "
+            "Only useful when `version_2_with_negative=True`."
+        },
+    )
+    doc_stride: int = field(
+        default=256,
+        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
+    )
+    n_best_size: int = field(
+        default=20,
+        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
+    )
+    max_answer_length: int = field(
+        default=30,
+        metadata={
+            "help": "The maximum length of an answer that can be generated. This is needed because the start "
+            "and end predictions are not conditioned on one another."
+        },
+    )
+
+    def __post_init__(self):
+        if (
+            self.dataset_name is None
+            and self.train_file is None
+            and self.validation_file is None
+            and self.test_file is None
+        ):
+            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
+        else:
+            if self.train_file is not None:
+                extension = self.train_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
+            if self.validation_file is not None:
+                extension = self.validation_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
+            if self.test_file is not None:
+                extension = self.test_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
+
+class SQuADDataset():
+    def __init__(
+        self, 
+        dataset,
+        model,
+        label_names: Optional[List[str]] = None,
+    ):  
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+    parser = HfArgumentParser(
+        (ModelArguments, DataTrainingArguments, TrainingArguments)
+    )
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(
+            json_file=os.path.abspath(sys.argv[1])
+        )
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+
+    log_level = training_args.get_process_log_level()
+    logger.setLevel(log_level)
+    datasets.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.enable_default_handler()
+    transformers.utils.logging.enable_explicit_format()
+
+
+    if os.path.isdir(training_args.output_dir) and not training_args.overwrite_output_dir:
+        raise ValueError(
+            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+            "Use --overwrite_output_dir to overcome."
+        )
+
+    os.makedirs(training_args.output_dir, exist_ok=True)
+
+    tokenizer = AutoTokenizer.from_pretrained(model_args.tokenizer_name or model_args.model_name_or_path)
+
+    # Prepare the dataset downloading, preprocessing and metric creation to perform the evaluation step(s)
+    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
+    # or just provide the name of one of the public datasets available on the hub at
+    # https://huggingface.co/datasets/ (the dataset will be downloaded automatically from the datasets Hub).
+    #
+    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
+    # 'text' is found. You can easily tweak this behavior (see below).
+    #
+    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
+    # download the dataset.
+    if data_args.dataset_name is not None:
+        # Downloading and loading a dataset from the hub.
+        raw_datasets = load_dataset(
+            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
+        )
+    else:
+        data_files = {}
+        if data_args.train_file is not None:
+            data_files["train"] = data_args.train_file
+            extension = data_args.train_file.split(".")[-1]
+        if data_args.validation_file is not None:
+            data_files["validation"] = data_args.validation_file
+            extension = data_args.validation_file.split(".")[-1]
+        if data_args.test_file is not None:
+            data_files["test"] = data_args.test_file
+            extension = data_args.test_file.split(".")[-1]
+        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
+    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc)
+    #  at https://huggingface.co/docs/datasets/loading_datasets.html.
+
+    # Preprocessing the datasets.
+    column_names = raw_datasets["validation"].column_names
+
+    question_column_name = "question" if "question" in column_names else column_names[0]
+    context_column_name = "context" if "context" in column_names else column_names[1]
+    answer_column_name = "answers" if "answers" in column_names else column_names[2]
+
+    # Validation preprocessing
+    def prepare_validation_features(examples, tokenizer: PreTrainedTokenizer):
+        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+        # left whitespace
+        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+        # Padding side determines if we do (question|context) or (context|question).
+        pad_on_right = tokenizer.padding_side == "right"
+
+        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This
+        # results in one example possible giving several features when a context is long, each of those features
+        # having a context that overlaps a bit the context of the previous feature.
+        tokenized_examples = tokenizer(
+            examples[question_column_name if pad_on_right else context_column_name],
+            examples[context_column_name if pad_on_right else question_column_name],
+            truncation="only_second" if pad_on_right else "only_first",
+            max_length=min(data_args.max_seq_length, tokenizer.model_max_length),
+            stride=data_args.doc_stride,
+            return_overflowing_tokens=True,
+            return_offsets_mapping=True,
+            padding="max_length" if data_args.pad_to_max_length else False,
+        )
+
+        # Since one example might give us several features if it has a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
+        # corresponding example_id and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example (to know what is the context and what is the question)
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans, this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(examples["id"][sample_index])
+
+            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
+
+    # Preprocess the evaluation dataset
+    if "validation" not in raw_datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_examples = raw_datasets["validation"]
+    if data_args.max_eval_samples is not None:
+        eval_examples = eval_examples.select(range(data_args.max_eval_samples))
+    eval_dataset = eval_examples.map(
+        partial(prepare_validation_features, tokenizer=tokenizer),
+        batched=True,
+        num_proc=data_args.preprocessing_num_workers,
+        remove_columns=column_names,
+        load_from_cache_file=not data_args.overwrite_cache,
+        desc="Running tokenizer on validation dataset",
+    )
+    if data_args.max_eval_samples is not None:
+        # During Feature creation dataset samples might increase, we will select required samples again
+        eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
+
+    # Post-processing:
+    def post_processing_function(examples, features, predictions, stage="eval"):
+        # Post-processing: we match the start logits and end logits to answers in the original context.
+        predictions = postprocess_qa_predictions(
+            examples=examples,
+            features=features,
+            predictions=predictions,
+            version_2_with_negative=data_args.version_2_with_negative,
+            n_best_size=data_args.n_best_size,
+            max_answer_length=data_args.max_answer_length,
+            null_score_diff_threshold=data_args.null_score_diff_threshold,
+            output_dir=training_args.output_dir,
+            log_level=log_level,
+            prefix=stage,
+        )
+        # Format the result to the format the metric expects.
+        if data_args.version_2_with_negative:
+            formatted_predictions = [
+                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
+            ]
+        else:
+            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
+
+        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
+        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
+
+    metric = load("squad_v2" if data_args.version_2_with_negative else "squad")
+
+    def compute_metrics(p: EvalPrediction):
+        return metric.compute(predictions=p.predictions, references=p.label_ids)
+
+
+    def eval_func(model, *args):
+        # Evaluation
+        logger.info("*** Evaluate ***")
+
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+            label_names=["start_positions", "end_positions"],
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        predictions = post_processing_function(eval_examples, eval_dataset, outputs.predictions)
+        metrics = compute_metrics(predictions)
+        return metrics['f1']
+
+    if model_args.tune:
+        # optimize model
+        from onnxruntime.transformers import optimizer
+        from onnxruntime.transformers.fusion_options import FusionOptions
+        opt_options = FusionOptions('bert')
+        opt_options.enable_embed_layer_norm = False
+
+        model_optimizer = optimizer.optimize_model(
+            model_args.input_model,
+            'bert',
+            num_heads=model_args.num_heads,
+            hidden_size=model_args.hidden_size,
+            optimization_options=opt_options)
+        model = model_optimizer.model
+
+        # check the optimized model is valid
+        try:
+            onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
+        except Exception as e:
+            logger.warning("Optimized model is invalid: {}. ".format(e))
+            logger.warning("Model optimizer will be skipped. " \
+                           "Try to upgrade onnxruntime to avoid this error")
+            model = onnx.load(model_args.input_model)
+
+        from neural_compressor import quantization, PostTrainingQuantConfig
+        from neural_compressor.utils.constant import FP32
+        calib_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
+        fp32_op_names = None
+        if model_args.model_name_or_path == 'mrm8488/spanbert-finetuned-squadv1':
+            fp32_op_names = ['/bert/embeddings/word_embeddings/Gather', 
+                             '/bert/encoder/layer.[5-7|9]/output/dense/MatMul']
+        elif model_args.model_name_or_path == 'salti/bert-base-multilingual-cased-finetuned-squad':
+            fp32_op_names = ['/bert/encoder/layer.[4-5]/output/dense/MatMul']
+        elif model_args.model_name_or_path == 'distilbert-base-uncased-distilled-squad':
+            fp32_op_names = ['/distilbert/transformer/layer.[1-5]/ffn/lin[1-2]/MatMul']
+        elif model_args.model_name_or_path == 'deepset/roberta-large-squad2':
+            fp32_op_names = ['/roberta/encoder/layer.\d+/intermediate/dense/MatMul',
+                             '/roberta/encoder/layer.\d+/output/dense/MatMul']
+        config = PostTrainingQuantConfig(approach='static',
+                                         quant_format=model_args.quant_format,
+                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names} \
+                                            if fp32_op_names else None,)
+        q_model = quantization.fit(model, 
+                                   config,
+                                   eval_func=eval_func,
+                                   calib_dataloader=DataLoader(framework='onnxruntime', 
+                                                               dataset=calib_dataset, 
+                                                               batch_size=model_args.batch_size)
+                                   )
+        q_model.save(model_args.save_path)
+
+    if model_args.benchmark:
+        model = onnx.load(model_args.input_model)
+        if model_args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            b_dataset = SQuADDataset(eval_dataset, model, label_names=["start_positions", "end_positions"])
+            conf = BenchmarkConfig(iteration=100,
+                                   cores_per_instance=28,
+                                   num_of_instance=1)
+            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+            fit(model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == 'accuracy':
+            eval_f1 = eval_func(model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py
similarity index 98%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py
index ef82998244d..5e83e780c46 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/prepare_model.py
@@ -1,78 +1,78 @@
-import argparse
-import os
-
-import torch
-from transformers import AutoConfig, AutoModelForQuestionAnswering
-
-def export_onnx_model(args, model):
-    with torch.no_grad():
-        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
-        if args.input_model in ['distilbert-base-uncased-distilled-squad',
-                                'deepset/roberta-large-squad2']:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                         'attention_mask'],
-                            output_names=['start_logits',
-                                          'end_logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                          'attention_mask' : symbolic_names})
-        else:
-            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
-                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
-                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
-            torch.onnx.export(model,                            # model being run
-                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
-                            inputs['attention_mask'],
-                            inputs['token_type_ids']),
-                            args.output_model,                  # where to save the model (can be a file or file-like object)
-                            opset_version=14,                   # the ONNX version to export the model
-                            do_constant_folding=True,           # whether to execute constant folding
-                            input_names=['input_ids',           # the model's input names
-                                        'attention_mask',
-                                        'token_type_ids'],
-                            output_names=['start_logits',
-                                          'end_logits'],
-                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
-                                          'attention_mask' : symbolic_names,
-                                          'token_type_ids' : symbolic_names})
-        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
-        print("ONNX Model exported to {0}".format(args.output_model))
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export huggingface onnx model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument('--input_model',
-                        type=str,
-                        default='mrm8488/spanbert-finetuned-squadv1',
-                        const='mrm8488/spanbert-finetuned-squadv1',
-                        nargs='?',
-                        choices=[
-                            'mrm8488/spanbert-finetuned-squadv1',
-                            'salti/bert-base-multilingual-cased-finetuned-squad',
-                            'distilbert-base-uncased-distilled-squad',
-                            'bert-large-uncased-whole-word-masking-finetuned-squad',
-                            'deepset/roberta-large-squad2'
-                        ],
-                        help='pretrained model name or path ')
-    parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument(
-        '--max_len',
-        type=int,
-        default=512,
-        help='Maximum length of the sentence pairs')
-    args = parser.parse_args()
-
-    model = AutoModelForQuestionAnswering.from_pretrained(
-        args.input_model,
-        config=AutoConfig.from_pretrained(args.input_model))
-
+import argparse
+import os
+
+import torch
+from transformers import AutoConfig, AutoModelForQuestionAnswering
+
+def export_onnx_model(args, model):
+    with torch.no_grad():
+        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        if args.input_model in ['distilbert-base-uncased-distilled-squad',
+                                'deepset/roberta-large-squad2']:
+            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
+                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64)}
+            torch.onnx.export(model,                            # model being run
+                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
+                            inputs['attention_mask']),
+                            args.output_model,                  # where to save the model (can be a file or file-like object)
+                            opset_version=14,                   # the ONNX version to export the model
+                            do_constant_folding=True,           # whether to execute constant folding
+                            input_names=['input_ids',           # the model's input names
+                                         'attention_mask'],
+                            output_names=['start_logits',
+                                          'end_logits'],
+                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
+                                          'attention_mask' : symbolic_names})
+        else:
+            inputs = {'input_ids':      torch.ones(1, args.max_len, dtype=torch.int64),
+                      'attention_mask': torch.ones(1, args.max_len, dtype=torch.int64),
+                      'token_type_ids': torch.ones(1, args.max_len, dtype=torch.int64)}
+            torch.onnx.export(model,                            # model being run
+                            (inputs['input_ids'],               # model input (or a tuple for multiple inputs) 
+                            inputs['attention_mask'],
+                            inputs['token_type_ids']),
+                            args.output_model,                  # where to save the model (can be a file or file-like object)
+                            opset_version=14,                   # the ONNX version to export the model
+                            do_constant_folding=True,           # whether to execute constant folding
+                            input_names=['input_ids',           # the model's input names
+                                        'attention_mask',
+                                        'token_type_ids'],
+                            output_names=['start_logits',
+                                          'end_logits'],
+                            dynamic_axes={'input_ids': symbolic_names,        # variable length axes
+                                          'attention_mask' : symbolic_names,
+                                          'token_type_ids' : symbolic_names})
+        assert os.path.exists(args.output_model), f"{args.output_model} doesn't exist!"
+        print("ONNX Model exported to {0}".format(args.output_model))
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export huggingface onnx model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument('--input_model',
+                        type=str,
+                        default='mrm8488/spanbert-finetuned-squadv1',
+                        const='mrm8488/spanbert-finetuned-squadv1',
+                        nargs='?',
+                        choices=[
+                            'mrm8488/spanbert-finetuned-squadv1',
+                            'salti/bert-base-multilingual-cased-finetuned-squad',
+                            'distilbert-base-uncased-distilled-squad',
+                            'bert-large-uncased-whole-word-masking-finetuned-squad',
+                            'deepset/roberta-large-squad2'
+                        ],
+                        help='pretrained model name or path ')
+    parser.add_argument("--output_model", type=str, required=True)
+    parser.add_argument(
+        '--max_len',
+        type=int,
+        default=512,
+        help='Maximum length of the sentence pairs')
+    args = parser.parse_args()
+
+    model = AutoModelForQuestionAnswering.from_pretrained(
+        args.input_model,
+        config=AutoConfig.from_pretrained(args.input_model))
+
     export_onnx_model(args, model)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt
index 0607f560e8c..f474f54ce83 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/requirements.txt
@@ -1,11 +1,11 @@
-onnx
-onnxruntime
-onnxruntime-extensions; python_version < '3.11'
-transformers
-accelerate
-tensorboard
-numpy==1.23.5
-datasets >= 1.8.0
-torch >= 1.9.0
-evaluate
+onnx
+onnxruntime
+onnxruntime-extensions; python_version < '3.11'
+transformers
+accelerate
+tensorboard
+numpy==1.23.5
+datasets >= 1.8.0
+torch >= 1.9.0
+evaluate
 tqdm
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py
index 190a20d317d..00b4545294d 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/trainer_qa.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/trainer_qa.py
@@ -1,96 +1,96 @@
-# coding=utf-8
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-A subclass of `IncTrainer` specific to Question-Answering tasks
-"""
-
-from transformers import Trainer
-from transformers.trainer_utils import PredictionOutput
-
-
-class QuestionAnsweringTrainer(Trainer):
-    def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.eval_examples = eval_examples
-        self.post_process_function = post_process_function
-
-    def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
-        eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
-        eval_dataloader = self.get_eval_dataloader(eval_dataset)
-        eval_examples = self.eval_examples if eval_examples is None else eval_examples
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                eval_dataloader,
-                description="Evaluation",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is not None and self.compute_metrics is not None:
-            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
-            metrics = self.compute_metrics(eval_preds)
-
-            # Prefix all keys with metric_key_prefix + '_'
-            for key in list(metrics.keys()):
-                if not key.startswith(f"{metric_key_prefix}_"):
-                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
-            self.log(metrics)
-        else:
-            metrics = {}
-
-        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
-        return metrics
-
-    def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
-        predict_dataloader = self.get_test_dataloader(predict_dataset)
-
-        # Temporarily disable metric computation, we will do it in the loop here.
-        compute_metrics = self.compute_metrics
-        self.compute_metrics = None
-        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
-        try:
-            output = eval_loop(
-                predict_dataloader,
-                description="Prediction",
-                # No point gathering the predictions if there are no metrics, otherwise we defer to
-                # self.args.prediction_loss_only
-                prediction_loss_only=True if compute_metrics is None else None,
-                ignore_keys=ignore_keys,
-            )
-        finally:
-            self.compute_metrics = compute_metrics
-
-        if self.post_process_function is None or self.compute_metrics is None:
-            return output
-
-        predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
-        metrics = self.compute_metrics(predictions)
-
-        # Prefix all keys with metric_key_prefix + '_'
-        for key in list(metrics.keys()):
-            if not key.startswith(f"{metric_key_prefix}_"):
-                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
-
+# coding=utf-8
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+A subclass of `IncTrainer` specific to Question-Answering tasks
+"""
+
+from transformers import Trainer
+from transformers.trainer_utils import PredictionOutput
+
+
+class QuestionAnsweringTrainer(Trainer):
+    def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.eval_examples = eval_examples
+        self.post_process_function = post_process_function
+
+    def evaluate(self, eval_dataset=None, eval_examples=None, ignore_keys=None, metric_key_prefix: str = "eval"):
+        eval_dataset = self.eval_dataset if eval_dataset is None else eval_dataset
+        eval_dataloader = self.get_eval_dataloader(eval_dataset)
+        eval_examples = self.eval_examples if eval_examples is None else eval_examples
+
+        # Temporarily disable metric computation, we will do it in the loop here.
+        compute_metrics = self.compute_metrics
+        self.compute_metrics = None
+        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+        try:
+            output = eval_loop(
+                eval_dataloader,
+                description="Evaluation",
+                # No point gathering the predictions if there are no metrics, otherwise we defer to
+                # self.args.prediction_loss_only
+                prediction_loss_only=True if compute_metrics is None else None,
+                ignore_keys=ignore_keys,
+            )
+        finally:
+            self.compute_metrics = compute_metrics
+
+        if self.post_process_function is not None and self.compute_metrics is not None:
+            eval_preds = self.post_process_function(eval_examples, eval_dataset, output.predictions)
+            metrics = self.compute_metrics(eval_preds)
+
+            # Prefix all keys with metric_key_prefix + '_'
+            for key in list(metrics.keys()):
+                if not key.startswith(f"{metric_key_prefix}_"):
+                    metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
+            self.log(metrics)
+        else:
+            metrics = {}
+
+        self.control = self.callback_handler.on_evaluate(self.args, self.state, self.control, metrics)
+        return metrics
+
+    def predict(self, predict_dataset, predict_examples, ignore_keys=None, metric_key_prefix: str = "test"):
+        predict_dataloader = self.get_test_dataloader(predict_dataset)
+
+        # Temporarily disable metric computation, we will do it in the loop here.
+        compute_metrics = self.compute_metrics
+        self.compute_metrics = None
+        eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
+        try:
+            output = eval_loop(
+                predict_dataloader,
+                description="Prediction",
+                # No point gathering the predictions if there are no metrics, otherwise we defer to
+                # self.args.prediction_loss_only
+                prediction_loss_only=True if compute_metrics is None else None,
+                ignore_keys=ignore_keys,
+            )
+        finally:
+            self.compute_metrics = compute_metrics
+
+        if self.post_process_function is None or self.compute_metrics is None:
+            return output
+
+        predictions = self.post_process_function(predict_examples, predict_dataset, output.predictions, "predict")
+        metrics = self.compute_metrics(predictions)
+
+        # Prefix all keys with metric_key_prefix + '_'
+        for key in list(metrics.keys()):
+            if not key.startswith(f"{metric_key_prefix}_"):
+                metrics[f"{metric_key_prefix}_{key}"] = metrics.pop(key)
+
         return PredictionOutput(predictions=predictions.predictions, label_ids=predictions.label_ids, metrics=metrics)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py
similarity index 98%
rename from examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py
index 6006e1f90bb..fb25f04ac1b 100644
--- a/examples/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_dynamic/utils_qa.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/question_answering/quantization/ptq_static/utils_qa.py
@@ -1,428 +1,428 @@
-# coding=utf-8
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-"""
-Post-processing utilities for question answering.
-"""
-import collections
-import json
-import logging
-import os
-from typing import Optional, Tuple
-
-import numpy as np
-from tqdm.auto import tqdm
-
-
-logger = logging.getLogger(__name__)
-
-
-def postprocess_qa_predictions(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    null_score_diff_threshold: float = 0.0,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
-    original contexts. This is the base postprocessing functions for models that only return start and end logits.
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
-            The threshold used to select the null answer: if the best answer has a score that is less than the score of
-            the null answer minus this threshold, the null answer is selected for this example (note that the score of
-            the null answer for an example giving several features is the minimum of the scores for the null answer on
-            each feature: all features must be aligned on the fact they `want` to predict a null answer).
-            Only useful when :obj:`version_2_with_negative` is :obj:`True`.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 2:
-        raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
-    all_start_logits, all_end_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    if version_2_with_negative:
-        scores_diff_json = collections.OrderedDict()
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_prediction = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_logits = all_start_logits[feature_index]
-            end_logits = all_end_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction.
-            feature_null_score = start_logits[0] + end_logits[0]
-            if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
-                min_null_prediction = {
-                    "offsets": (0, 0),
-                    "score": feature_null_score,
-                    "start_logit": start_logits[0],
-                    "end_logit": end_logits[0],
-                }
-
-            # Go through all possibilities for the `n_best_size` greater start and end logits.
-            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
-            for start_index in start_indexes:
-                for end_index in end_indexes:
-                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
-                    # to part of the input_ids that are not in the context.
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or offset_mapping[end_index] is None
-                    ):
-                        continue
-                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_logits[start_index] + end_logits[end_index],
-                            "start_logit": start_logits[start_index],
-                            "end_logit": end_logits[end_index],
-                        }
-                    )
-        if version_2_with_negative:
-            # Add the minimum null prediction
-            prelim_predictions.append(min_null_prediction)
-            null_score = min_null_prediction["score"]
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Add back the minimum null prediction if it was removed because of its low score.
-        if version_2_with_negative and not any(p["offsets"] == (0, 0) for p in predictions):
-            predictions.append(min_null_prediction)
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
-            predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction. If the null answer is not possible, this is easy.
-        if not version_2_with_negative:
-            all_predictions[example["id"]] = predictions[0]["text"]
-        else:
-            # Otherwise we first need to find the best non-empty prediction.
-            i = 0
-            while predictions[i]["text"] == "":
-                i += 1
-            best_non_null_pred = predictions[i]
-
-            # Then we compare to the null prediction using the threshold.
-            score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
-            scores_diff_json[example["id"]] = float(score_diff)  # To be JSON-serializable.
-            if score_diff > null_score_diff_threshold:
-                all_predictions[example["id"]] = ""
-            else:
-                all_predictions[example["id"]] = best_non_null_pred["text"]
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
-    return all_predictions
-
-
-def postprocess_qa_predictions_with_beam_search(
-    examples,
-    features,
-    predictions: Tuple[np.ndarray, np.ndarray],
-    version_2_with_negative: bool = False,
-    n_best_size: int = 20,
-    max_answer_length: int = 30,
-    start_n_top: int = 5,
-    end_n_top: int = 5,
-    output_dir: Optional[str] = None,
-    prefix: Optional[str] = None,
-    log_level: Optional[int] = logging.WARNING,
-):
-    """
-    Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
-    original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
-    cls token predictions.
-    Args:
-        examples: The non-preprocessed dataset (see the main script for more information).
-        features: The processed dataset (see the main script for more information).
-        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
-            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
-            first dimension must match the number of elements of :obj:`features`.
-        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
-            Whether or not the underlying dataset contains examples with no answers.
-        n_best_size (:obj:`int`, `optional`, defaults to 20):
-            The total number of n-best predictions to generate when looking for an answer.
-        max_answer_length (:obj:`int`, `optional`, defaults to 30):
-            The maximum length of an answer that can be generated. This is needed because the start and end predictions
-            are not conditioned on one another.
-        start_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
-        end_n_top (:obj:`int`, `optional`, defaults to 5):
-            The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
-        output_dir (:obj:`str`, `optional`):
-            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
-            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
-            answers, are saved in `output_dir`.
-        prefix (:obj:`str`, `optional`):
-            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
-        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
-            ``logging`` log level (e.g., ``logging.WARNING``)
-    """
-    if len(predictions) != 5:
-        raise ValueError("`predictions` should be a tuple with five elements.")
-    start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
-
-    if len(predictions[0]) != len(features):
-        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
-
-    # Build a map example to its corresponding features.
-    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
-    features_per_example = collections.defaultdict(list)
-    for i, feature in enumerate(features):
-        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
-
-    # The dictionaries we have to fill.
-    all_predictions = collections.OrderedDict()
-    all_nbest_json = collections.OrderedDict()
-    scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
-
-    # Logging.
-    logger.setLevel(log_level)
-    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
-
-    # Let's loop over all the examples!
-    for example_index, example in enumerate(tqdm(examples)):
-        # Those are the indices of the features associated to the current example.
-        feature_indices = features_per_example[example_index]
-
-        min_null_score = None
-        prelim_predictions = []
-
-        # Looping through all the features associated to the current example.
-        for feature_index in feature_indices:
-            # We grab the predictions of the model for this feature.
-            start_log_prob = start_top_log_probs[feature_index]
-            start_indexes = start_top_index[feature_index]
-            end_log_prob = end_top_log_probs[feature_index]
-            end_indexes = end_top_index[feature_index]
-            feature_null_score = cls_logits[feature_index]
-            # This is what will allow us to map some the positions in our logits to span of texts in the original
-            # context.
-            offset_mapping = features[feature_index]["offset_mapping"]
-            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
-            # available in the current feature.
-            token_is_max_context = features[feature_index].get("token_is_max_context", None)
-
-            # Update minimum null prediction
-            if min_null_score is None or feature_null_score < min_null_score:
-                min_null_score = feature_null_score
-
-            # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
-            for i in range(start_n_top):
-                for j in range(end_n_top):
-                    start_index = int(start_indexes[i])
-                    j_index = i * end_n_top + j
-                    end_index = int(end_indexes[j_index])
-                    # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
-                    # p_mask but let's not take any risk)
-                    if (
-                        start_index >= len(offset_mapping)
-                        or end_index >= len(offset_mapping)
-                        or offset_mapping[start_index] is None
-                        or offset_mapping[end_index] is None
-                    ):
-                        continue
-                    # Don't consider answers with a length negative or > max_answer_length.
-                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
-                        continue
-                    # Don't consider answer that don't have the maximum context available (if such information is
-                    # provided).
-                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
-                        continue
-                    prelim_predictions.append(
-                        {
-                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
-                            "score": start_log_prob[i] + end_log_prob[j_index],
-                            "start_log_prob": start_log_prob[i],
-                            "end_log_prob": end_log_prob[j_index],
-                        }
-                    )
-
-        # Only keep the best `n_best_size` predictions.
-        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
-
-        # Use the offsets to gather the answer text in the original context.
-        context = example["context"]
-        for pred in predictions:
-            offsets = pred.pop("offsets")
-            pred["text"] = context[offsets[0] : offsets[1]]
-
-        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
-        # failure.
-        if len(predictions) == 0:
-            predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
-
-        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
-        # the LogSumExp trick).
-        scores = np.array([pred.pop("score") for pred in predictions])
-        exp_scores = np.exp(scores - np.max(scores))
-        probs = exp_scores / exp_scores.sum()
-
-        # Include the probabilities in our predictions.
-        for prob, pred in zip(probs, predictions):
-            pred["probability"] = prob
-
-        # Pick the best prediction and set the probability for the null answer.
-        all_predictions[example["id"]] = predictions[0]["text"]
-        if version_2_with_negative:
-            scores_diff_json[example["id"]] = float(min_null_score)
-
-        # Make `predictions` JSON-serializable by casting np.float back to float.
-        all_nbest_json[example["id"]] = [
-            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
-            for pred in predictions
-        ]
-
-    # If we have an output_dir, let's save all those dicts.
-    if output_dir is not None:
-        if not os.path.isdir(output_dir):
-            raise EnvironmentError(f"{output_dir} is not a directory.")
-
-        prediction_file = os.path.join(
-            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
-        )
-        nbest_file = os.path.join(
-            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
-        )
-        if version_2_with_negative:
-            null_odds_file = os.path.join(
-                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
-            )
-
-        logger.info(f"Saving predictions to {prediction_file}.")
-        with open(prediction_file, "w") as writer:
-            writer.write(json.dumps(all_predictions, indent=4) + "\n")
-        logger.info(f"Saving nbest_preds to {nbest_file}.")
-        with open(nbest_file, "w") as writer:
-            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
-        if version_2_with_negative:
-            logger.info(f"Saving null_odds to {null_odds_file}.")
-            with open(null_odds_file, "w") as writer:
-                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
-
+# coding=utf-8
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+"""
+Post-processing utilities for question answering.
+"""
+import collections
+import json
+import logging
+import os
+from typing import Optional, Tuple
+
+import numpy as np
+from tqdm.auto import tqdm
+
+
+logger = logging.getLogger(__name__)
+
+
+def postprocess_qa_predictions(
+    examples,
+    features,
+    predictions: Tuple[np.ndarray, np.ndarray],
+    version_2_with_negative: bool = False,
+    n_best_size: int = 20,
+    max_answer_length: int = 30,
+    null_score_diff_threshold: float = 0.0,
+    output_dir: Optional[str] = None,
+    prefix: Optional[str] = None,
+    log_level: Optional[int] = logging.WARNING,
+):
+    """
+    Post-processes the predictions of a question-answering model to convert them to answers that are substrings of the
+    original contexts. This is the base postprocessing functions for models that only return start and end logits.
+    Args:
+        examples: The non-preprocessed dataset (see the main script for more information).
+        features: The processed dataset (see the main script for more information).
+        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+            first dimension must match the number of elements of :obj:`features`.
+        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not the underlying dataset contains examples with no answers.
+        n_best_size (:obj:`int`, `optional`, defaults to 20):
+            The total number of n-best predictions to generate when looking for an answer.
+        max_answer_length (:obj:`int`, `optional`, defaults to 30):
+            The maximum length of an answer that can be generated. This is needed because the start and end predictions
+            are not conditioned on one another.
+        null_score_diff_threshold (:obj:`float`, `optional`, defaults to 0):
+            The threshold used to select the null answer: if the best answer has a score that is less than the score of
+            the null answer minus this threshold, the null answer is selected for this example (note that the score of
+            the null answer for an example giving several features is the minimum of the scores for the null answer on
+            each feature: all features must be aligned on the fact they `want` to predict a null answer).
+            Only useful when :obj:`version_2_with_negative` is :obj:`True`.
+        output_dir (:obj:`str`, `optional`):
+            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+            answers, are saved in `output_dir`.
+        prefix (:obj:`str`, `optional`):
+            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+            ``logging`` log level (e.g., ``logging.WARNING``)
+    """
+    if len(predictions) != 2:
+        raise ValueError("`predictions` should be a tuple with two elements (start_logits, end_logits).")
+    all_start_logits, all_end_logits = predictions
+
+    if len(predictions[0]) != len(features):
+        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    if version_2_with_negative:
+        scores_diff_json = collections.OrderedDict()
+
+    # Logging.
+    logger.setLevel(log_level)
+    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+    # Let's loop over all the examples!
+    for example_index, example in enumerate(tqdm(examples)):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_prediction = None
+        prelim_predictions = []
+
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_logits = all_start_logits[feature_index]
+            end_logits = all_end_logits[feature_index]
+            # This is what will allow us to map some the positions in our logits to span of texts in the original
+            # context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+            # available in the current feature.
+            token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+            # Update minimum null prediction.
+            feature_null_score = start_logits[0] + end_logits[0]
+            if min_null_prediction is None or min_null_prediction["score"] > feature_null_score:
+                min_null_prediction = {
+                    "offsets": (0, 0),
+                    "score": feature_null_score,
+                    "start_logit": start_logits[0],
+                    "end_logit": end_logits[0],
+                }
+
+            # Go through all possibilities for the `n_best_size` greater start and end logits.
+            start_indexes = np.argsort(start_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            end_indexes = np.argsort(end_logits)[-1 : -n_best_size - 1 : -1].tolist()
+            for start_index in start_indexes:
+                for end_index in end_indexes:
+                    # Don't consider out-of-scope answers, either because the indices are out of bounds or correspond
+                    # to part of the input_ids that are not in the context.
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length that is either < 0 or > max_answer_length.
+                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+                        continue
+                    # Don't consider answer that don't have the maximum context available (if such information is
+                    # provided).
+                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+                        continue
+                    prelim_predictions.append(
+                        {
+                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+                            "score": start_logits[start_index] + end_logits[end_index],
+                            "start_logit": start_logits[start_index],
+                            "end_logit": end_logits[end_index],
+                        }
+                    )
+        if version_2_with_negative:
+            # Add the minimum null prediction
+            prelim_predictions.append(min_null_prediction)
+            null_score = min_null_prediction["score"]
+
+        # Only keep the best `n_best_size` predictions.
+        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+        # Add back the minimum null prediction if it was removed because of its low score.
+        if version_2_with_negative and not any(p["offsets"] == (0, 0) for p in predictions):
+            predictions.append(min_null_prediction)
+
+        # Use the offsets to gather the answer text in the original context.
+        context = example["context"]
+        for pred in predictions:
+            offsets = pred.pop("offsets")
+            pred["text"] = context[offsets[0] : offsets[1]]
+
+        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+        # failure.
+        if len(predictions) == 0 or (len(predictions) == 1 and predictions[0]["text"] == ""):
+            predictions.insert(0, {"text": "empty", "start_logit": 0.0, "end_logit": 0.0, "score": 0.0})
+
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # the LogSumExp trick).
+        scores = np.array([pred.pop("score") for pred in predictions])
+        exp_scores = np.exp(scores - np.max(scores))
+        probs = exp_scores / exp_scores.sum()
+
+        # Include the probabilities in our predictions.
+        for prob, pred in zip(probs, predictions):
+            pred["probability"] = prob
+
+        # Pick the best prediction. If the null answer is not possible, this is easy.
+        if not version_2_with_negative:
+            all_predictions[example["id"]] = predictions[0]["text"]
+        else:
+            # Otherwise we first need to find the best non-empty prediction.
+            i = 0
+            while predictions[i]["text"] == "":
+                i += 1
+            best_non_null_pred = predictions[i]
+
+            # Then we compare to the null prediction using the threshold.
+            score_diff = null_score - best_non_null_pred["start_logit"] - best_non_null_pred["end_logit"]
+            scores_diff_json[example["id"]] = float(score_diff)  # To be JSON-serializable.
+            if score_diff > null_score_diff_threshold:
+                all_predictions[example["id"]] = ""
+            else:
+                all_predictions[example["id"]] = best_non_null_pred["text"]
+
+        # Make `predictions` JSON-serializable by casting np.float back to float.
+        all_nbest_json[example["id"]] = [
+            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            for pred in predictions
+        ]
+
+    # If we have an output_dir, let's save all those dicts.
+    if output_dir is not None:
+        if not os.path.isdir(output_dir):
+            raise EnvironmentError(f"{output_dir} is not a directory.")
+
+        prediction_file = os.path.join(
+            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+        )
+        nbest_file = os.path.join(
+            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+        )
+        if version_2_with_negative:
+            null_odds_file = os.path.join(
+                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+            )
+
+        logger.info(f"Saving predictions to {prediction_file}.")
+        with open(prediction_file, "w") as writer:
+            writer.write(json.dumps(all_predictions, indent=4) + "\n")
+        logger.info(f"Saving nbest_preds to {nbest_file}.")
+        with open(nbest_file, "w") as writer:
+            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+        if version_2_with_negative:
+            logger.info(f"Saving null_odds to {null_odds_file}.")
+            with open(null_odds_file, "w") as writer:
+                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
+    return all_predictions
+
+
+def postprocess_qa_predictions_with_beam_search(
+    examples,
+    features,
+    predictions: Tuple[np.ndarray, np.ndarray],
+    version_2_with_negative: bool = False,
+    n_best_size: int = 20,
+    max_answer_length: int = 30,
+    start_n_top: int = 5,
+    end_n_top: int = 5,
+    output_dir: Optional[str] = None,
+    prefix: Optional[str] = None,
+    log_level: Optional[int] = logging.WARNING,
+):
+    """
+    Post-processes the predictions of a question-answering model with beam search to convert them to answers that are substrings of the
+    original contexts. This is the postprocessing functions for models that return start and end logits, indices, as well as
+    cls token predictions.
+    Args:
+        examples: The non-preprocessed dataset (see the main script for more information).
+        features: The processed dataset (see the main script for more information).
+        predictions (:obj:`Tuple[np.ndarray, np.ndarray]`):
+            The predictions of the model: two arrays containing the start logits and the end logits respectively. Its
+            first dimension must match the number of elements of :obj:`features`.
+        version_2_with_negative (:obj:`bool`, `optional`, defaults to :obj:`False`):
+            Whether or not the underlying dataset contains examples with no answers.
+        n_best_size (:obj:`int`, `optional`, defaults to 20):
+            The total number of n-best predictions to generate when looking for an answer.
+        max_answer_length (:obj:`int`, `optional`, defaults to 30):
+            The maximum length of an answer that can be generated. This is needed because the start and end predictions
+            are not conditioned on one another.
+        start_n_top (:obj:`int`, `optional`, defaults to 5):
+            The number of top start logits too keep when searching for the :obj:`n_best_size` predictions.
+        end_n_top (:obj:`int`, `optional`, defaults to 5):
+            The number of top end logits too keep when searching for the :obj:`n_best_size` predictions.
+        output_dir (:obj:`str`, `optional`):
+            If provided, the dictionaries of predictions, n_best predictions (with their scores and logits) and, if
+            :obj:`version_2_with_negative=True`, the dictionary of the scores differences between best and null
+            answers, are saved in `output_dir`.
+        prefix (:obj:`str`, `optional`):
+            If provided, the dictionaries mentioned above are saved with `prefix` added to their names.
+        log_level (:obj:`int`, `optional`, defaults to ``logging.WARNING``):
+            ``logging`` log level (e.g., ``logging.WARNING``)
+    """
+    if len(predictions) != 5:
+        raise ValueError("`predictions` should be a tuple with five elements.")
+    start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = predictions
+
+    if len(predictions[0]) != len(features):
+        raise ValueError(f"Got {len(predictions[0])} predictions and {len(features)} features.")
+
+    # Build a map example to its corresponding features.
+    example_id_to_index = {k: i for i, k in enumerate(examples["id"])}
+    features_per_example = collections.defaultdict(list)
+    for i, feature in enumerate(features):
+        features_per_example[example_id_to_index[feature["example_id"]]].append(i)
+
+    # The dictionaries we have to fill.
+    all_predictions = collections.OrderedDict()
+    all_nbest_json = collections.OrderedDict()
+    scores_diff_json = collections.OrderedDict() if version_2_with_negative else None
+
+    # Logging.
+    logger.setLevel(log_level)
+    logger.info(f"Post-processing {len(examples)} example predictions split into {len(features)} features.")
+
+    # Let's loop over all the examples!
+    for example_index, example in enumerate(tqdm(examples)):
+        # Those are the indices of the features associated to the current example.
+        feature_indices = features_per_example[example_index]
+
+        min_null_score = None
+        prelim_predictions = []
+
+        # Looping through all the features associated to the current example.
+        for feature_index in feature_indices:
+            # We grab the predictions of the model for this feature.
+            start_log_prob = start_top_log_probs[feature_index]
+            start_indexes = start_top_index[feature_index]
+            end_log_prob = end_top_log_probs[feature_index]
+            end_indexes = end_top_index[feature_index]
+            feature_null_score = cls_logits[feature_index]
+            # This is what will allow us to map some the positions in our logits to span of texts in the original
+            # context.
+            offset_mapping = features[feature_index]["offset_mapping"]
+            # Optional `token_is_max_context`, if provided we will remove answers that do not have the maximum context
+            # available in the current feature.
+            token_is_max_context = features[feature_index].get("token_is_max_context", None)
+
+            # Update minimum null prediction
+            if min_null_score is None or feature_null_score < min_null_score:
+                min_null_score = feature_null_score
+
+            # Go through all possibilities for the `n_start_top`/`n_end_top` greater start and end logits.
+            for i in range(start_n_top):
+                for j in range(end_n_top):
+                    start_index = int(start_indexes[i])
+                    j_index = i * end_n_top + j
+                    end_index = int(end_indexes[j_index])
+                    # Don't consider out-of-scope answers (last part of the test should be unnecessary because of the
+                    # p_mask but let's not take any risk)
+                    if (
+                        start_index >= len(offset_mapping)
+                        or end_index >= len(offset_mapping)
+                        or offset_mapping[start_index] is None
+                        or offset_mapping[end_index] is None
+                    ):
+                        continue
+                    # Don't consider answers with a length negative or > max_answer_length.
+                    if end_index < start_index or end_index - start_index + 1 > max_answer_length:
+                        continue
+                    # Don't consider answer that don't have the maximum context available (if such information is
+                    # provided).
+                    if token_is_max_context is not None and not token_is_max_context.get(str(start_index), False):
+                        continue
+                    prelim_predictions.append(
+                        {
+                            "offsets": (offset_mapping[start_index][0], offset_mapping[end_index][1]),
+                            "score": start_log_prob[i] + end_log_prob[j_index],
+                            "start_log_prob": start_log_prob[i],
+                            "end_log_prob": end_log_prob[j_index],
+                        }
+                    )
+
+        # Only keep the best `n_best_size` predictions.
+        predictions = sorted(prelim_predictions, key=lambda x: x["score"], reverse=True)[:n_best_size]
+
+        # Use the offsets to gather the answer text in the original context.
+        context = example["context"]
+        for pred in predictions:
+            offsets = pred.pop("offsets")
+            pred["text"] = context[offsets[0] : offsets[1]]
+
+        # In the very rare edge case we have not a single non-null prediction, we create a fake prediction to avoid
+        # failure.
+        if len(predictions) == 0:
+            predictions.insert(0, {"text": "", "start_logit": -1e-6, "end_logit": -1e-6, "score": -2e-6})
+
+        # Compute the softmax of all scores (we do it with numpy to stay independent from torch/tf in this file, using
+        # the LogSumExp trick).
+        scores = np.array([pred.pop("score") for pred in predictions])
+        exp_scores = np.exp(scores - np.max(scores))
+        probs = exp_scores / exp_scores.sum()
+
+        # Include the probabilities in our predictions.
+        for prob, pred in zip(probs, predictions):
+            pred["probability"] = prob
+
+        # Pick the best prediction and set the probability for the null answer.
+        all_predictions[example["id"]] = predictions[0]["text"]
+        if version_2_with_negative:
+            scores_diff_json[example["id"]] = float(min_null_score)
+
+        # Make `predictions` JSON-serializable by casting np.float back to float.
+        all_nbest_json[example["id"]] = [
+            {k: (float(v) if isinstance(v, (np.float16, np.float32, np.float64)) else v) for k, v in pred.items()}
+            for pred in predictions
+        ]
+
+    # If we have an output_dir, let's save all those dicts.
+    if output_dir is not None:
+        if not os.path.isdir(output_dir):
+            raise EnvironmentError(f"{output_dir} is not a directory.")
+
+        prediction_file = os.path.join(
+            output_dir, "predictions.json" if prefix is None else f"{prefix}_predictions.json"
+        )
+        nbest_file = os.path.join(
+            output_dir, "nbest_predictions.json" if prefix is None else f"{prefix}_nbest_predictions.json"
+        )
+        if version_2_with_negative:
+            null_odds_file = os.path.join(
+                output_dir, "null_odds.json" if prefix is None else f"{prefix}_null_odds.json"
+            )
+
+        logger.info(f"Saving predictions to {prediction_file}.")
+        with open(prediction_file, "w") as writer:
+            writer.write(json.dumps(all_predictions, indent=4) + "\n")
+        logger.info(f"Saving nbest_preds to {nbest_file}.")
+        with open(nbest_file, "w") as writer:
+            writer.write(json.dumps(all_nbest_json, indent=4) + "\n")
+        if version_2_with_negative:
+            logger.info(f"Saving null_odds to {null_odds_file}.")
+            with open(null_odds_file, "w") as writer:
+                writer.write(json.dumps(scores_diff_json, indent=4) + "\n")
+
     return all_predictions, scores_diff_json
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/eval.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/eval.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/eval.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/eval.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/main.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/main.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_data.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_data.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/requirements.txt
diff --git a/examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/run.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/run.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_classification/mix_precision/run.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_classification/mix_precision/run.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/gptj/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prepare_model.py
diff --git a/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json
new file mode 100644
index 00000000000..09aae772b44
--- /dev/null
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json
@@ -0,0 +1,7 @@
+{
+    "32":   "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun",
+    "512":  "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, instead of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dilemma when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level",
+    "1024": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, instead of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dilemma when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I haven't seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level, but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think data or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in vicinity of a plate",
+    "2017": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, instead of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dilemma when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think data or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in the vicinity of a plate, it starts conquering it for its team. It takes around 10 seconds for a plate to be conquered; less if more pasta from the same team are around. If pasta from other team are around, though, they get locked down in their attempt, unable to conquer the plate, until one of them die (think Battlefield's standard 'Conquest' mode). You get points every second for every plate you own. Over time, the concept also evolved to use an Italian bistro as its main scenario. Carlos, Carlos' Bistro's founder and owner Setup No major changes were made from my work setup. I used FDT and Starling creating an Adobe AIR (ActionScript) project, all tools or frameworks I already had some knowledge with. One big change for me was that I livestreamed my work through a twitch.tv account. This was a new thing for me. As recommended by Roushey, I used a program called XSplit and I got to say, it is pretty amazing. It made the livestream pretty effortless and the features are awesome, even for the free version. It was great to have some of my friends watch me, and then interact with them and random people through chat. It was also good knowing that I was also recording a local version of the files, so I could make a timelapse video later. Knowing the video was being recorded also made me a lot more self-conscious about my computer use, as if someone was watching over my shoulder. It made me realize that sometimes I spend too much time in seemingly inane tasks (I ended up wasting the longest time just to get some text alignment the way I wanted - it'll probably drive someone crazy if they watch it) and that I do way too many typos where writing code. I pretty much spend half of the time writing a line and the other half fixing the crazy characters in it. My own stream was probably boring to watch since I was coding for the most time. But livestreaming is one of the cool things to do as a spectator too. It was great seeing other people working - I had a few tabs opened on my second monitor all the time. It's actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I'd only do it once in a while, when resting for a bit. Design Although I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art. I think it worked very well, fitting the mood of the game, but I also went overboard. For example: to know the state of a plate (who owns it, who's conquering it and how much time they have left before conquering it, which pasta units are in the queue, etc), you have to look at the plate's bill. The problem I realized when doing some tests is that people never look at the bill! They think it's some kind of prop, so they never actually read its details. Plus, if you're zoomed out too much, you can't actually read it, so it's hard to know what's going on with the game until you zoom in to the area of a specific plate. One other solution that didn't turn out to be as perfect as I thought was how to indicate who a plate base belongs to. In the game, that's indicated by the plate's decoration - its color denotes the team owner. But it's something that fits so well into the design that people never realized it, until they were told about it. In the end, the idea of going with a full physical metaphor is one that should be done with care. Things that are very important risk becoming background noise, unless the player knows its importance. Originally, I wanted to avoid any kind of heads-up display in my game. In the end, I ended up adding it at the bottom to indicate your credits and bases owned, as well as the hideous out-of-place-and-still-not-obvious 'Call Waiter' button. But in hindsight, I should have gone with a simple HUD from the start, especially one that indicated each team's colors and general state of the game without the need for zooming in and out. Development Development went fast. But not fast enough. Even though I worked around 32+ hours for this Ludum Dare, the biggest problem that I had to face in the end was overscoping. I had too much planned,",
+    "2048": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, instead of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dilemma when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think data or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in the vicinity of a plate, it starts conquering it for its team. It takes around 10 seconds for a plate to be conquered; less if more pasta from the same team are around. If pasta from other team are around, though, they get locked down in their attempt, unable to conquer the plate, until one of them die (think Battlefield's standard 'Conquest' mode). You get points every second for every plate you own. Over time, the concept also evolved to use an Italian bistro as its main scenario. Carlos, Carlos' Bistro's founder and owner Setup No major changes were made from my work setup. I used FDT and Starling creating an Adobe AIR (ActionScript) project, all tools or frameworks I already had some knowledge with. One big change for me was that I livestreamed my work through a twitch.tv account. This was a new thing for me. As recommended by Roushey, I used a program called XSplit and I got to say, it is pretty amazing. It made the livestream pretty effortless and the features are awesome, even for the free version. It was great to have some of my friends watch me, and then interact with them and random people through chat. It was also good knowing that I was also recording a local version of the files, so I could make a timelapse video later. Knowing the video was being recorded also made me a lot more self-conscious about my computer use, as if someone was watching over my shoulder. It made me realize that sometimes I spend too much time in seemingly inane tasks (I ended up wasting the longest time just to get some text alignment the way I wanted - it'll probably drive someone crazy if they watch it) and that I do way too many typos where writing code. I pretty much spend half of the time writing a line and the other half fixing the crazy characters in it. My own stream was probably boring to watch since I was coding for the most time. But livestreaming is one of the cool things to do as a spectator too. It was great seeing other people working - I had a few tabs opened on my second monitor all the time. It's actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I'd only do it once in a while, when resting for a bit. Design Although I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art. I think it worked very well, fitting the mood of the game, but I also went overboard. For example: to know the state of a plate (who owns it, who's conquering it and how much time they have left before conquering it, which pasta units are in the queue, etc), you have to look at the plate's bill. The problem I realized when doing some tests is that people never look at the bill! They think it's some kind of prop, so they never actually read its details. Plus, if you're zoomed out too much, you can't actually read it, so it's hard to know what's going on with the game until you zoom in to the area of a specific plate. One other solution that didn't turn out to be as perfect as I thought was how to indicate who a plate base belongs to. In the game, that's indicated by the plate's decoration - its color denotes the team owner. But it's something that fits so well into the design that people never realized it, until they were told about it. In the end, the idea of going with a full physical metaphor is one that should be done with care. Things that are very important risk becoming background noise, unless the player knows its importance. Originally, I wanted to avoid any kind of heads-up display in my game. In the end, I ended up adding it at the bottom to indicate your credits and bases owned, as well as the hideous out-of-place-and-still-not-obvious 'Call Waiter' button. But in hindsight, I should have gone with a simple HUD from the start, especially one that indicated each team's colors and general state of the game without the need for zooming in and out. Development Development went fast. But not fast enough. Even though I worked around 32+ hours for this Ludum Dare, the biggest problem that I had to face in the end was overscoping. I had too much planned, and couldn't get it all done. Content-wise, I had several kinds of pasta planned - Wikipedia is just amazing in that regard,"
+}
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/main.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/requirements.txt
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/weight_only/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py
index 445df84c199..a232f552133 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/funsd.py
@@ -1,116 +1,116 @@
-# coding=utf-8
-
-import json
-import os
-
-import datasets
-
-from utils import load_image, normalize_bbox
-
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """Conll2003 dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "tokens": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            tokens = []
-            bboxes = []
-            ner_tags = []
-
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                words, label = item["words"], item["label"]
-                words = [w for w in words if w["text"].strip() != ""]
-                if len(words) == 0:
-                    continue
-                if label == "other":
-                    for w in words:
-                        tokens.append(w["text"])
-                        ner_tags.append("O")
-                        bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    tokens.append(words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    bboxes.append(normalize_bbox(words[0]["box"], size))
-                    for w in words[1:]:
-                        tokens.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        bboxes.append(normalize_bbox(w["box"], size))
-
+# coding=utf-8
+
+import json
+import os
+
+import datasets
+
+from utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+
             yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py
index 7c790576167..551e3f47730 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/main.py
@@ -1,468 +1,468 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-from datasets import ClassLabel, load_dataset, load_metric
-
-import funsd
-import transformers
-from utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
-from trainer import FunsdTrainer as Trainer
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-import onnxruntime
-import onnx
-from neural_compressor.data import DataLoader
-
-import pyarrow_hotfix; pyarrow_hotfix.uninstall()
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.5.0")
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(
-        default=None,
-        metadata={"help": "Path to onnx model"}
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    num_heads: int = field(
-        default=12,
-        metadata={"help": ("onnx model optimize num_heads")},
-    )
-    hidden_size: int = field(
-        default=768,
-        metadata={"help": ("onnx model optimize hidden_size")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-
-
-class IncDataset():
-    def __init__(self, 
-                 dataset,
-                 model,
-                 label_names=None,):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    datasets = load_dataset(os.path.abspath(funsd.__file__))
-    if training_args.do_train:
-        column_names = datasets["train"].column_names
-        features = datasets["train"].features
-    else:
-        column_names = datasets["test"].column_names
-        features = datasets["test"].features
-    text_column_name = "tokens" if "tokens" in column_names else column_names[0]
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = list(unique_labels)
-        label_list.sort()
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForTokenClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-        use_safetensors = False,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=padding,
-            truncation=True,
-            return_overflowing_tokens=True,
-            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
-            is_split_into_words=True,
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            image = examples["image"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-            images.append(image)
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        tokenized_inputs["image"] = images
-        return tokenized_inputs
-
-    if training_args.do_train:
-        if "train" not in datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = datasets["train"]
-        if data_args.max_train_samples is not None:
-            train_dataset = train_dataset.select(range(data_args.max_train_samples))
-        train_dataset = train_dataset.map(
-            tokenize_and_align_labels,
-            batched=True,
-            remove_columns=remove_columns,
-            num_proc=data_args.preprocessing_num_workers,
-            load_from_cache_file=not data_args.overwrite_cache,
-        )
-
-    if training_args.do_eval:
-        if "test" not in datasets:
-            raise ValueError("--do_eval requires a test dataset")
-        test_dataset = datasets["test"]
-        if data_args.max_test_samples is not None:
-            test_dataset = test_dataset.select(range(data_args.max_test_samples))
-        test_dataset = test_dataset.map(
-            tokenize_and_align_labels,
-            batched=True,
-            remove_columns=remove_columns,
-            num_proc=data_args.preprocessing_num_workers,
-            load_from_cache_file=not data_args.overwrite_cache,
-        )
-
-    # Data collator
-    data_collator = DataCollatorForKeyValueExtraction(
-        tokenizer,
-        pad_to_multiple_of=8 if training_args.fp16 else None,
-        padding=padding,
-        max_length=512,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Initialize our Trainer
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=test_dataset if training_args.do_eval else None,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        compute_metrics=compute_metrics,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = last_checkpoint if last_checkpoint else None
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        metrics = train_result.metrics
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        from model import ORTModel
-        def eval_func(model):
-            logger.info("*** Evaluate ***")
-            ort_model = ORTModel(
-                model,
-                compute_metrics=compute_metrics,
-            )
-            outputs = ort_model.evaluation_loop(test_dataset)
-            return outputs.metrics['f1']
-
-        if model_args.tune:
-            # optimize model
-            from onnxruntime.transformers import optimizer
-            from onnxruntime.transformers.fusion_options import FusionOptions
-            opt_options = FusionOptions('bert')
-
-            model_optimizer = optimizer.optimize_model(
-                model_args.input_model,
-                'bert',
-                num_heads=12,
-                hidden_size=768,
-                optimization_options=opt_options)
-            onnx_model = model_optimizer.model
-
-            # check the optimized model is valid
-            try:
-                onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
-            except Exception as e:
-                logger.warning("Optimized model is invalid: {}. ".format(e))
-                logger.warning("Model optimizer will be skipped. " \
-                            "Try to upgrade onnxruntime to avoid this error")
-                onnx_model = onnx.load(model_args.input_model)
-        
-            from neural_compressor import quantization, PostTrainingQuantConfig
-
-            config = PostTrainingQuantConfig(approach='dynamic')
-            q_model = quantization.fit(onnx_model, 
-                                       config,
-                                       eval_func=eval_func)
-            q_model.save(model_args.save_path)
-        if model_args.benchmark:
-            onnx_model = onnx.load(model_args.input_model)
-            if model_args.mode == 'performance':
-                from neural_compressor.benchmark import fit
-                from neural_compressor.config import BenchmarkConfig
-                b_dataset = IncDataset(test_dataset, onnx_model)
-                conf = BenchmarkConfig(iteration=100,
-                                        cores_per_instance=28,
-                                        num_of_instance=1)
-                b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-                fit(onnx_model, conf, b_dataloader=b_dataloader)
-            elif model_args.mode == 'accuracy':
-                eval_f1 = eval_func(onnx_model)
-                print("Batch size = %d" % model_args.batch_size)
-                print("Accuracy: %.5f" % eval_f1)
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+from datasets import ClassLabel, load_dataset, load_metric
+
+import funsd
+import transformers
+from utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
+from trainer import FunsdTrainer as Trainer
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+import onnxruntime
+import onnx
+from neural_compressor.data import DataLoader
+
+import pyarrow_hotfix; pyarrow_hotfix.uninstall()
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.5.0")
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(
+        default=None,
+        metadata={"help": "Path to onnx model"}
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    num_heads: int = field(
+        default=12,
+        metadata={"help": ("onnx model optimize num_heads")},
+    )
+    hidden_size: int = field(
+        default=768,
+        metadata={"help": ("onnx model optimize hidden_size")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+
+
+class IncDataset():
+    def __init__(self, 
+                 dataset,
+                 model,
+                 label_names=None,):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    datasets = load_dataset(os.path.abspath(funsd.__file__))
+    if training_args.do_train:
+        column_names = datasets["train"].column_names
+        features = datasets["train"].features
+    else:
+        column_names = datasets["test"].column_names
+        features = datasets["test"].features
+    text_column_name = "tokens" if "tokens" in column_names else column_names[0]
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = list(unique_labels)
+        label_list.sort()
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    model = AutoModelForTokenClassification.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        use_safetensors = False,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=padding,
+            truncation=True,
+            return_overflowing_tokens=True,
+            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
+            is_split_into_words=True,
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            image = examples["image"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+            images.append(image)
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        tokenized_inputs["image"] = images
+        return tokenized_inputs
+
+    if training_args.do_train:
+        if "train" not in datasets:
+            raise ValueError("--do_train requires a train dataset")
+        train_dataset = datasets["train"]
+        if data_args.max_train_samples is not None:
+            train_dataset = train_dataset.select(range(data_args.max_train_samples))
+        train_dataset = train_dataset.map(
+            tokenize_and_align_labels,
+            batched=True,
+            remove_columns=remove_columns,
+            num_proc=data_args.preprocessing_num_workers,
+            load_from_cache_file=not data_args.overwrite_cache,
+        )
+
+    if training_args.do_eval:
+        if "test" not in datasets:
+            raise ValueError("--do_eval requires a test dataset")
+        test_dataset = datasets["test"]
+        if data_args.max_test_samples is not None:
+            test_dataset = test_dataset.select(range(data_args.max_test_samples))
+        test_dataset = test_dataset.map(
+            tokenize_and_align_labels,
+            batched=True,
+            remove_columns=remove_columns,
+            num_proc=data_args.preprocessing_num_workers,
+            load_from_cache_file=not data_args.overwrite_cache,
+        )
+
+    # Data collator
+    data_collator = DataCollatorForKeyValueExtraction(
+        tokenizer,
+        pad_to_multiple_of=8 if training_args.fp16 else None,
+        padding=padding,
+        max_length=512,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Initialize our Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset if training_args.do_train else None,
+        eval_dataset=test_dataset if training_args.do_eval else None,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+        compute_metrics=compute_metrics,
+    )
+
+    # Training
+    if training_args.do_train:
+        checkpoint = last_checkpoint if last_checkpoint else None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        metrics = train_result.metrics
+        trainer.save_model()  # Saves the tokenizer too for easy upload
+
+        max_train_samples = (
+            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
+        )
+        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
+
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+
+    # Evaluation
+    if training_args.do_eval:
+        from model import ORTModel
+        def eval_func(model):
+            logger.info("*** Evaluate ***")
+            ort_model = ORTModel(
+                model,
+                compute_metrics=compute_metrics,
+            )
+            outputs = ort_model.evaluation_loop(test_dataset)
+            return outputs.metrics['f1']
+
+        if model_args.tune:
+            # optimize model
+            from onnxruntime.transformers import optimizer
+            from onnxruntime.transformers.fusion_options import FusionOptions
+            opt_options = FusionOptions('bert')
+
+            model_optimizer = optimizer.optimize_model(
+                model_args.input_model,
+                'bert',
+                num_heads=12,
+                hidden_size=768,
+                optimization_options=opt_options)
+            onnx_model = model_optimizer.model
+
+            # check the optimized model is valid
+            try:
+                onnxruntime.InferenceSession(model.SerializeToString(), providers=onnxruntime.get_available_providers())
+            except Exception as e:
+                logger.warning("Optimized model is invalid: {}. ".format(e))
+                logger.warning("Model optimizer will be skipped. " \
+                            "Try to upgrade onnxruntime to avoid this error")
+                onnx_model = onnx.load(model_args.input_model)
+        
+            from neural_compressor import quantization, PostTrainingQuantConfig
+
+            config = PostTrainingQuantConfig(approach='dynamic')
+            q_model = quantization.fit(onnx_model, 
+                                       config,
+                                       eval_func=eval_func)
+            q_model.save(model_args.save_path)
+        if model_args.benchmark:
+            onnx_model = onnx.load(model_args.input_model)
+            if model_args.mode == 'performance':
+                from neural_compressor.benchmark import fit
+                from neural_compressor.config import BenchmarkConfig
+                b_dataset = IncDataset(test_dataset, onnx_model)
+                conf = BenchmarkConfig(iteration=100,
+                                        cores_per_instance=28,
+                                        num_of_instance=1)
+                b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+                fit(onnx_model, conf, b_dataloader=b_dataloader)
+            elif model_args.mode == 'accuracy':
+                eval_f1 = eval_func(onnx_model)
+                print("Batch size = %d" % model_args.batch_size)
+                print("Accuracy: %.5f" % eval_f1)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py
index 0a6c03e7c21..9e94ca0f56f 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/model.py
@@ -1,80 +1,80 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            onnx_inputs = {key: np.array([inputs[key]]) for key in self.onnx_input_names if key in inputs}
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            onnx_inputs = {key: np.array([inputs[key]]) for key in self.onnx_input_names if key in inputs}
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt
similarity index 92%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt
index 0510bcb4b39..54266b7289d 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt
@@ -1,20 +1,20 @@
-accelerate
-datasets
-transformers
-huggingface-hub
-seqeval
-tensorboard
-sentencepiece
-timm
-fvcore
-Pillow
-einops
-textdistance
-shapely
-protobuf
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
+accelerate
+datasets
+transformers
+huggingface-hub
+seqeval
+tensorboard
+sentencepiece
+timm
+fvcore
+Pillow
+einops
+textdistance
+shapely
+protobuf
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py
index 0a2f88390f8..7379ae60f55 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py
@@ -1,21 +1,21 @@
-from typing import Any, Dict, Union
-
-import torch
-
-from transformers import Trainer
-
-
-class FunsdTrainer(Trainer):
-    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
-        """
-        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
-        handling potential state.
-        """
-        for k, v in inputs.items():
-            if hasattr(v, "to") and hasattr(v, "device"):
-                inputs[k] = v.to(self.args.device)
-
-        if self.args.past_index >= 0 and self._past is not None:
-            inputs["mems"] = self._past
-
+from typing import Any, Dict, Union
+
+import torch
+
+from transformers import Trainer
+
+
+class FunsdTrainer(Trainer):
+    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
+        """
+        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
+        handling potential state.
+        """
+        for k, v in inputs.items():
+            if hasattr(v, "to") and hasattr(v, "device"):
+                inputs[k] = v.to(self.args.device)
+
+        if self.args.past_index >= 0 and self._past is not None:
+            inputs["mems"] = self._past
+
         return inputs
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py
index c77f3e46317..f7ff0396188 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/utils.py
@@ -1,506 +1,506 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-from __future__ import division
-from typing import Any, Dict, List, Optional, Tuple
-import torch
-from torch import device
-from torch.nn import functional as F
-from dataclasses import dataclass, field
-from typing import Optional, Union
-from transformers import PreTrainedTokenizerBase
-from transformers.file_utils import PaddingStrategy
-import numpy as np
-from PIL import Image
-
-from fvcore.transforms.transform import TransformList, Transform
-
-from iopath.common.file_io import PathManager as PathManagerBase
-PathManager = PathManagerBase()
-
-# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
-_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
-
-# https://www.exiv2.org/tags.html
-_EXIF_ORIENT = 274  # exif 'Orientation' tag
-
-
-@torch.jit.script_if_tracing
-def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor:
-    """
-    Tracing friendly way to cast tensor to another tensor's device. Device will be treated
-    as constant during tracing, scripting the casting process as whole can workaround this issue.
-    """
-    return src.to(dst.device)
-
-def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor:
-    """
-    Turn a list of integer scalars or integer Tensor scalars into a vector,
-    in a way that's both traceable and scriptable.
-
-    In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs.
-    In scripting or eager, `x` should be a list of int.
-    """
-    if torch.jit.is_scripting():
-        return torch.as_tensor(x, device=device)
-    if torch.jit.is_tracing():
-        assert all(
-            [isinstance(t, torch.Tensor) for t in x]
-        ), "Shape should be tensor during tracing!"
-        # as_tensor should not be used in tracing because it records a constant
-        ret = torch.stack(x)
-        if ret.device != device:  # avoid recording a hard-coded device if not necessary
-            ret = ret.to(device=device)
-        return ret
-    return torch.as_tensor(x, device=device)
-
-class ImageList(object):
-    """
-    Structure that holds a list of images (of possibly
-    varying sizes) as a single tensor.
-    This works by padding the images to the same size.
-    The original sizes of each image is stored in `image_sizes`.
-
-    Attributes:
-        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
-            During tracing, it becomes list[Tensor] instead.
-    """
-
-    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
-        """
-        Arguments:
-            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
-            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
-                be smaller than (H, W) due to padding.
-        """
-        self.tensor = tensor
-        self.image_sizes = image_sizes
-
-    def __len__(self) -> int:
-        return len(self.image_sizes)
-
-    def __getitem__(self, idx) -> torch.Tensor:
-        """
-        Access the individual image in its original size.
-
-        Args:
-            idx: int or slice
-
-        Returns:
-            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
-        """
-        size = self.image_sizes[idx]
-        return self.tensor[idx, ..., : size[0], : size[1]]
-
-    @torch.jit.unused
-    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
-        cast_tensor = self.tensor.to(*args, **kwargs)
-        return ImageList(cast_tensor, self.image_sizes)
-
-    @property
-    def device(self) -> device:
-        return self.tensor.device
-
-    @staticmethod
-    def from_tensors(
-        tensors: List[torch.Tensor],
-        size_divisibility: int = 0,
-        pad_value: float = 0.0,
-        padding_constraints: Optional[Dict[str, int]] = None,
-    ) -> "ImageList":
-        """
-        Args:
-            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
-                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
-                to the same shape with `pad_value`.
-            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
-                the common height and width is divisible by `size_divisibility`.
-                This depends on the model and many models need a divisibility of 32.
-            pad_value (float): value to pad.
-            padding_constraints (optional[Dict]): If given, it would follow the format as
-                {"size_divisibility": int, "square_size": int}, where `size_divisibility` will
-                overwrite the above one if presented and `square_size` indicates the
-                square padding size if `square_size` > 0.
-        Returns:
-            an `ImageList`.
-        """
-        assert len(tensors) > 0
-        assert isinstance(tensors, (tuple, list))
-        for t in tensors:
-            assert isinstance(t, torch.Tensor), type(t)
-            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
-
-        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
-        image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes]
-        max_size = torch.stack(image_sizes_tensor).max(0).values
-
-        if padding_constraints is not None:
-            square_size = padding_constraints.get("square_size", 0)
-            if square_size > 0:
-                # pad to square.
-                max_size[0] = max_size[1] = square_size
-            if "size_divisibility" in padding_constraints:
-                size_divisibility = padding_constraints["size_divisibility"]
-        if size_divisibility > 1:
-            stride = size_divisibility
-            # the last two dims are H,W, both subject to divisibility requirement
-            max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride
-
-        # handle weirdness of scripting and tracing ...
-        if torch.jit.is_scripting():
-            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
-        else:
-            if torch.jit.is_tracing():
-                image_sizes = image_sizes_tensor
-
-        if len(tensors) == 1:
-            # This seems slightly (2%) faster.
-            # TODO: check whether it's faster for multiple images as well
-            image_size = image_sizes[0]
-            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
-            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
-        else:
-            # max_size can be a tensor in tracing mode, therefore convert to list
-            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
-            device = (
-                None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None)
-            )
-            batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device)
-            batched_imgs = move_device_like(batched_imgs, tensors[0])
-            for i, img in enumerate(tensors):
-                # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)`
-                # Tracing mode cannot capture `copy_()` of temporary locals
-                batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img)
-
-        return ImageList(batched_imgs.contiguous(), image_sizes)
-
-def convert_PIL_to_numpy(image, format):
-    """
-    Convert PIL image to numpy array of target format.
-
-    Args:
-        image (PIL.Image): a PIL image
-        format (str): the format of output image
-
-    Returns:
-        (np.ndarray): also see `read_image`
-    """
-    if format is not None:
-        # PIL only supports RGB, so convert to RGB and flip channels over below
-        conversion_format = format
-        if format in ["BGR", "YUV-BT.601"]:
-            conversion_format = "RGB"
-        image = image.convert(conversion_format)
-    image = np.asarray(image)
-    # PIL squeezes out the channel dimension for "L", so make it HWC
-    if format == "L":
-        image = np.expand_dims(image, -1)
-
-    # handle formats not supported by PIL
-    elif format == "BGR":
-        # flip channels if needed
-        image = image[:, :, ::-1]
-    elif format == "YUV-BT.601":
-        image = image / 255.0
-        image = np.dot(image, np.array(_M_RGB2YUV).T)
-
-    return image
-
-def _apply_exif_orientation(image):
-    """
-    Applies the exif orientation correctly.
-
-    This code exists per the bug:
-      https://github.com/python-pillow/Pillow/issues/3973
-    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
-    various methods, especially `tobytes`
-
-    Function based on:
-      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
-      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
-
-    Args:
-        image (PIL.Image): a PIL image
-
-    Returns:
-        (PIL.Image): the PIL image with exif orientation applied, if applicable
-    """
-    if not hasattr(image, "getexif"):
-        return image
-
-    try:
-        exif = image.getexif()
-    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
-        exif = None
-
-    if exif is None:
-        return image
-
-    orientation = exif.get(_EXIF_ORIENT)
-
-    method = {
-        2: Image.FLIP_LEFT_RIGHT,
-        3: Image.ROTATE_180,
-        4: Image.FLIP_TOP_BOTTOM,
-        5: Image.TRANSPOSE,
-        6: Image.ROTATE_270,
-        7: Image.TRANSVERSE,
-        8: Image.ROTATE_90,
-    }.get(orientation)
-
-    if method is not None:
-        return image.transpose(method)
-    return image
-
-def read_image(file_name, format=None):
-    """
-    Read an image into the given format.
-    Will apply rotation and flipping if the image has such exif information.
-
-    Args:
-        file_name (str): image file path
-        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
-
-    Returns:
-        image (np.ndarray):
-            an HWC image in the given format, which is 0-255, uint8 for
-            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
-    """
-    with PathManager.open(file_name, "rb") as f:
-        image = Image.open(f)
-
-        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
-        image = _apply_exif_orientation(image)
-        return convert_PIL_to_numpy(image, format)
-    
-class ResizeTransform(Transform):
-    """
-    Resize the image to a target size.
-    """
-
-    def __init__(self, h, w, new_h, new_w, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            new_h, new_w (int): new image size
-            interp: PIL interpolation methods, defaults to bilinear.
-        """
-        # TODO decide on PIL vs opencv
-        super().__init__()
-        if interp is None:
-            interp = Image.BILINEAR
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        assert img.shape[:2] == (self.h, self.w)
-        assert len(img.shape) <= 4
-        interp_method = interp if interp is not None else self.interp
-
-        if img.dtype == np.uint8:
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                pil_image = Image.fromarray(img[:, :, 0], mode="L")
-            else:
-                pil_image = Image.fromarray(img)
-            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
-            ret = np.asarray(pil_image)
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                ret = np.expand_dims(ret, -1)
-        else:
-            # PIL only supports uint8
-            if any(x < 0 for x in img.strides):
-                img = np.ascontiguousarray(img)
-            img = torch.from_numpy(img)
-            shape = list(img.shape)
-            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
-            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
-            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
-                Image.NEAREST: "nearest",
-                Image.BILINEAR: "bilinear",
-                Image.BICUBIC: "bicubic",
-            }
-            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
-            align_corners = None if mode == "nearest" else False
-            img = F.interpolate(
-                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
-            )
-            shape[:2] = (self.new_h, self.new_w)
-            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
-
-        return ret
-
-    def apply_coords(self, coords):
-        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
-        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-    def inverse(self):
-        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
-    
-@dataclass
-class DataCollatorForKeyValueExtraction:
-    """
-    Data collator that will dynamically pad the inputs received, as well as the labels.
-
-    Args:
-        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
-            The tokenizer used for encoding the data.
-        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
-            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
-            among:
-
-            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
-              sequence if provided).
-            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
-              maximum acceptable input length for the model if that argument is not provided.
-            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
-              different lengths).
-        max_length (:obj:`int`, `optional`):
-            Maximum length of the returned list and optionally padding length (see above).
-        pad_to_multiple_of (:obj:`int`, `optional`):
-            If set will pad the sequence to a multiple of the provided value.
-
-            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
-            7.5 (Volta).
-        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
-            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
-    """
-
-    tokenizer: PreTrainedTokenizerBase
-    padding: Union[bool, str, PaddingStrategy] = True
-    max_length: Optional[int] = None
-    pad_to_multiple_of: Optional[int] = None
-    label_pad_token_id: int = -100
-
-    def __call__(self, features):
-        label_name = "label" if "label" in features[0].keys() else "labels"
-        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
-
-        has_image_input = "image" in features[0]
-        has_bbox_input = "bbox" in features[0]
-        if has_image_input:
-            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
-            for feature in features:
-                del feature["image"]
-        batch = self.tokenizer.pad(
-            features,
-            padding=self.padding,
-            max_length=self.max_length,
-            pad_to_multiple_of=self.pad_to_multiple_of,
-            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
-            return_tensors="pt" if labels is None else None,
-        )
-
-        if labels is None:
-            return batch
-
-        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
-        padding_side = self.tokenizer.padding_side
-        if padding_side == "right":
-            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
-        else:
-            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
-
-        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
-        if has_image_input:
-            batch["image"] = image
-        return batch
-    
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-
-
-@dataclass
-class XFUNDataTrainingArguments(DataTrainingArguments):
-    lang: Optional[str] = field(default="en")
-    additional_langs: Optional[str] = field(default=None)
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-def load_image(image_path):
-    image = read_image(image_path, format="BGR")
-    h = image.shape[0]
-    w = image.shape[1]
-    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
-    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import division
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+from torch import device
+from torch.nn import functional as F
+from dataclasses import dataclass, field
+from typing import Optional, Union
+from transformers import PreTrainedTokenizerBase
+from transformers.file_utils import PaddingStrategy
+import numpy as np
+from PIL import Image
+
+from fvcore.transforms.transform import TransformList, Transform
+
+from iopath.common.file_io import PathManager as PathManagerBase
+PathManager = PathManagerBase()
+
+# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+
+@torch.jit.script_if_tracing
+def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor:
+    """
+    Tracing friendly way to cast tensor to another tensor's device. Device will be treated
+    as constant during tracing, scripting the casting process as whole can workaround this issue.
+    """
+    return src.to(dst.device)
+
+def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor:
+    """
+    Turn a list of integer scalars or integer Tensor scalars into a vector,
+    in a way that's both traceable and scriptable.
+
+    In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs.
+    In scripting or eager, `x` should be a list of int.
+    """
+    if torch.jit.is_scripting():
+        return torch.as_tensor(x, device=device)
+    if torch.jit.is_tracing():
+        assert all(
+            [isinstance(t, torch.Tensor) for t in x]
+        ), "Shape should be tensor during tracing!"
+        # as_tensor should not be used in tracing because it records a constant
+        ret = torch.stack(x)
+        if ret.device != device:  # avoid recording a hard-coded device if not necessary
+            ret = ret.to(device=device)
+        return ret
+    return torch.as_tensor(x, device=device)
+
+class ImageList(object):
+    """
+    Structure that holds a list of images (of possibly
+    varying sizes) as a single tensor.
+    This works by padding the images to the same size.
+    The original sizes of each image is stored in `image_sizes`.
+
+    Attributes:
+        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
+            During tracing, it becomes list[Tensor] instead.
+    """
+
+    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
+        """
+        Arguments:
+            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
+            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
+                be smaller than (H, W) due to padding.
+        """
+        self.tensor = tensor
+        self.image_sizes = image_sizes
+
+    def __len__(self) -> int:
+        return len(self.image_sizes)
+
+    def __getitem__(self, idx) -> torch.Tensor:
+        """
+        Access the individual image in its original size.
+
+        Args:
+            idx: int or slice
+
+        Returns:
+            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
+        """
+        size = self.image_sizes[idx]
+        return self.tensor[idx, ..., : size[0], : size[1]]
+
+    @torch.jit.unused
+    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
+        cast_tensor = self.tensor.to(*args, **kwargs)
+        return ImageList(cast_tensor, self.image_sizes)
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    @staticmethod
+    def from_tensors(
+        tensors: List[torch.Tensor],
+        size_divisibility: int = 0,
+        pad_value: float = 0.0,
+        padding_constraints: Optional[Dict[str, int]] = None,
+    ) -> "ImageList":
+        """
+        Args:
+            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
+                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
+                to the same shape with `pad_value`.
+            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
+                the common height and width is divisible by `size_divisibility`.
+                This depends on the model and many models need a divisibility of 32.
+            pad_value (float): value to pad.
+            padding_constraints (optional[Dict]): If given, it would follow the format as
+                {"size_divisibility": int, "square_size": int}, where `size_divisibility` will
+                overwrite the above one if presented and `square_size` indicates the
+                square padding size if `square_size` > 0.
+        Returns:
+            an `ImageList`.
+        """
+        assert len(tensors) > 0
+        assert isinstance(tensors, (tuple, list))
+        for t in tensors:
+            assert isinstance(t, torch.Tensor), type(t)
+            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
+
+        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
+        image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes]
+        max_size = torch.stack(image_sizes_tensor).max(0).values
+
+        if padding_constraints is not None:
+            square_size = padding_constraints.get("square_size", 0)
+            if square_size > 0:
+                # pad to square.
+                max_size[0] = max_size[1] = square_size
+            if "size_divisibility" in padding_constraints:
+                size_divisibility = padding_constraints["size_divisibility"]
+        if size_divisibility > 1:
+            stride = size_divisibility
+            # the last two dims are H,W, both subject to divisibility requirement
+            max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride
+
+        # handle weirdness of scripting and tracing ...
+        if torch.jit.is_scripting():
+            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
+        else:
+            if torch.jit.is_tracing():
+                image_sizes = image_sizes_tensor
+
+        if len(tensors) == 1:
+            # This seems slightly (2%) faster.
+            # TODO: check whether it's faster for multiple images as well
+            image_size = image_sizes[0]
+            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
+            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
+        else:
+            # max_size can be a tensor in tracing mode, therefore convert to list
+            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
+            device = (
+                None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None)
+            )
+            batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device)
+            batched_imgs = move_device_like(batched_imgs, tensors[0])
+            for i, img in enumerate(tensors):
+                # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)`
+                # Tracing mode cannot capture `copy_()` of temporary locals
+                batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img)
+
+        return ImageList(batched_imgs.contiguous(), image_sizes)
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+    
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = F.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+    
+@dataclass
+class DataCollatorForKeyValueExtraction:
+    """
+    Data collator that will dynamically pad the inputs received, as well as the labels.
+
+    Args:
+        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
+            The tokenizer used for encoding the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
+            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
+    """
+
+    tokenizer: PreTrainedTokenizerBase
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    label_pad_token_id: int = -100
+
+    def __call__(self, features):
+        label_name = "label" if "label" in features[0].keys() else "labels"
+        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
+
+        has_image_input = "image" in features[0]
+        has_bbox_input = "bbox" in features[0]
+        if has_image_input:
+            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
+            for feature in features:
+                del feature["image"]
+        batch = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
+            return_tensors="pt" if labels is None else None,
+        )
+
+        if labels is None:
+            return batch
+
+        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
+        padding_side = self.tokenizer.padding_side
+        if padding_side == "right":
+            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
+        else:
+            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
+
+        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
+        if has_image_input:
+            batch["image"] = image
+        return batch
+    
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+
+
+@dataclass
+class XFUNDataTrainingArguments(DataTrainingArguments):
+    lang: Optional[str] = field(default="en")
+    additional_langs: Optional[str] = field(default=None)
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
     return image, (w, h)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py
index 445df84c199..a232f552133 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/funsd.py
@@ -1,116 +1,116 @@
-# coding=utf-8
-
-import json
-import os
-
-import datasets
-
-from utils import load_image, normalize_bbox
-
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """Conll2003 dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "tokens": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            tokens = []
-            bboxes = []
-            ner_tags = []
-
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                words, label = item["words"], item["label"]
-                words = [w for w in words if w["text"].strip() != ""]
-                if len(words) == 0:
-                    continue
-                if label == "other":
-                    for w in words:
-                        tokens.append(w["text"])
-                        ner_tags.append("O")
-                        bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    tokens.append(words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    bboxes.append(normalize_bbox(words[0]["box"], size))
-                    for w in words[1:]:
-                        tokens.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        bboxes.append(normalize_bbox(w["box"], size))
-
+# coding=utf-8
+
+import json
+import os
+
+import datasets
+
+from utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+
             yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py
index ba61977b2e5..3480ff27f72 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/main.py
@@ -1,482 +1,482 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-from datasets import ClassLabel, load_dataset, load_metric
-
-import funsd
-import transformers
-from utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
-from trainer import FunsdTrainer as Trainer
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-import onnxruntime
-import onnx
-from neural_compressor.data import DataLoader
-
-import pyarrow_hotfix; pyarrow_hotfix.uninstall()
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.5.0")
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(
-        default=None,
-        metadata={"help": "Path to onnx model"}
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    num_heads: int = field(
-        default=12,
-        metadata={"help": ("onnx model optimize num_heads")},
-    )
-    hidden_size: int = field(
-        default=768,
-        metadata={"help": ("onnx model optimize hidden_size")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-    quant_format: str = field(
-        default="QOperator",
-        metadata={"help": ("quant format")},
-    )
-
-
-class IncDataset():
-    def __init__(self, 
-                 dataset,
-                 model,
-                 label_names=None,):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    datasets = load_dataset(os.path.abspath(funsd.__file__))
-    if training_args.do_train:
-        column_names = datasets["train"].column_names
-        features = datasets["train"].features
-    else:
-        column_names = datasets["test"].column_names
-        features = datasets["test"].features
-    text_column_name = "tokens" if "tokens" in column_names else column_names[0]
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = list(unique_labels)
-        label_list.sort()
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForTokenClassification.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-        use_safetensors = False,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=padding,
-            truncation=True,
-            return_overflowing_tokens=True,
-            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
-            is_split_into_words=True,
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            image = examples["image"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-            images.append(image)
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        tokenized_inputs["image"] = images
-        return tokenized_inputs
-
-    if training_args.do_train:
-        if "train" not in datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = datasets["train"]
-        if data_args.max_train_samples is not None:
-            train_dataset = train_dataset.select(range(data_args.max_train_samples))
-        train_dataset = train_dataset.map(
-            tokenize_and_align_labels,
-            batched=True,
-            remove_columns=remove_columns,
-            num_proc=data_args.preprocessing_num_workers,
-            load_from_cache_file=not data_args.overwrite_cache,
-        )
-
-    if training_args.do_eval:
-        if "test" not in datasets:
-            raise ValueError("--do_eval requires a test dataset")
-        test_dataset = datasets["test"]
-        if data_args.max_test_samples is not None:
-            test_dataset = test_dataset.select(range(data_args.max_test_samples))
-        test_dataset = test_dataset.map(
-            tokenize_and_align_labels,
-            batched=True,
-            remove_columns=remove_columns,
-            num_proc=data_args.preprocessing_num_workers,
-            load_from_cache_file=not data_args.overwrite_cache,
-        )
-
-    # Data collator
-    data_collator = DataCollatorForKeyValueExtraction(
-        tokenizer,
-        pad_to_multiple_of=8 if training_args.fp16 else None,
-        padding=padding,
-        max_length=512,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Initialize our Trainer
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=test_dataset if training_args.do_eval else None,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        compute_metrics=compute_metrics,
-    )
-
-    # Training
-    if training_args.do_train:
-        checkpoint = last_checkpoint if last_checkpoint else None
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        metrics = train_result.metrics
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        from model import ORTModel
-        def eval_func(model):
-            logger.info("*** Evaluate ***")
-            ort_model = ORTModel(
-                model,
-                compute_metrics=compute_metrics,
-            )
-            outputs = ort_model.evaluation_loop(test_dataset)
-            return outputs.metrics['f1']
-
-        if model_args.tune:
-            # optimize model
-            from onnxruntime.transformers import optimizer
-            from onnxruntime.transformers.fusion_options import FusionOptions
-            opt_options = FusionOptions('bert')
-
-            model_optimizer = optimizer.optimize_model(
-                model_args.input_model,
-                'bert',
-                num_heads=12,
-                hidden_size=768,
-                optimization_options=opt_options)
-            onnx_model = model_optimizer.model
-
-            # check the optimized model is valid
-            try:
-                onnxruntime.InferenceSession(onnx_model.SerializeToString(), providers=onnxruntime.get_available_providers())
-            except Exception as e:
-                logger.warning("Optimized model is invalid: {}. ".format(e))
-                logger.warning("Model optimizer will be skipped. " \
-                            "Try to upgrade onnxruntime to avoid this error")
-                onnx_model = onnx.load(model_args.input_model)
-            
-            from neural_compressor import quantization, PostTrainingQuantConfig
-            from neural_compressor.utils.constant import FP32
-
-            calib_dataset = IncDataset(test_dataset, onnx_model)
-            fp32_op_names = ['Attention_(0|3|4)', 
-                            '/layoutlm/embeddings/Add(_(2|4|6)|)',
-                            '.*?_position_embeddings.*?']
-            config = PostTrainingQuantConfig(approach='static',
-                                             quant_format=model_args.quant_format,
-                                             op_name_dict={op_name:FP32 for op_name in fp32_op_names})
-            q_model = quantization.fit(onnx_model, 
-                                       config,
-                                       eval_func=eval_func,
-                                       calib_dataloader=DataLoader(framework='onnxruntime',
-                                                                   dataset=calib_dataset, 
-                                                                   batch_size=1))
-            q_model.save(model_args.save_path)
-        if model_args.benchmark:
-            onnx_model = onnx.load(model_args.input_model)
-            if model_args.mode == 'performance':
-                from neural_compressor.benchmark import fit
-                from neural_compressor.config import BenchmarkConfig
-                b_dataset = IncDataset(test_dataset, onnx_model)
-                conf = BenchmarkConfig(iteration=100,
-                                        cores_per_instance=28,
-                                        num_of_instance=1)
-                b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-                fit(onnx_model, conf, b_dataloader=b_dataloader)
-            elif model_args.mode == 'accuracy':
-                eval_f1 = eval_func(onnx_model)
-                print("Batch size = %d" % model_args.batch_size)
-                print("Accuracy: %.5f" % eval_f1)
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+from datasets import ClassLabel, load_dataset, load_metric
+
+import funsd
+import transformers
+from utils import DataCollatorForKeyValueExtraction, DataTrainingArguments
+from trainer import FunsdTrainer as Trainer
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+import onnxruntime
+import onnx
+from neural_compressor.data import DataLoader
+
+import pyarrow_hotfix; pyarrow_hotfix.uninstall()
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.5.0")
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(
+        default=None,
+        metadata={"help": "Path to onnx model"}
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    num_heads: int = field(
+        default=12,
+        metadata={"help": ("onnx model optimize num_heads")},
+    )
+    hidden_size: int = field(
+        default=768,
+        metadata={"help": ("onnx model optimize hidden_size")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+    quant_format: str = field(
+        default="QOperator",
+        metadata={"help": ("quant format")},
+    )
+
+
+class IncDataset():
+    def __init__(self, 
+                 dataset,
+                 model,
+                 label_names=None,):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            self.onnx_inputs.append([np.array(inputs[key]) for key in self.onnx_input_names if key in inputs])
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    datasets = load_dataset(os.path.abspath(funsd.__file__))
+    if training_args.do_train:
+        column_names = datasets["train"].column_names
+        features = datasets["train"].features
+    else:
+        column_names = datasets["test"].column_names
+        features = datasets["test"].features
+    text_column_name = "tokens" if "tokens" in column_names else column_names[0]
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = list(unique_labels)
+        label_list.sort()
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    model = AutoModelForTokenClassification.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+        use_safetensors = False,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=padding,
+            truncation=True,
+            return_overflowing_tokens=True,
+            # We use this argument because the texts in our dataset are lists of words (with a label for each word).
+            is_split_into_words=True,
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            image = examples["image"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+            images.append(image)
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        tokenized_inputs["image"] = images
+        return tokenized_inputs
+
+    if training_args.do_train:
+        if "train" not in datasets:
+            raise ValueError("--do_train requires a train dataset")
+        train_dataset = datasets["train"]
+        if data_args.max_train_samples is not None:
+            train_dataset = train_dataset.select(range(data_args.max_train_samples))
+        train_dataset = train_dataset.map(
+            tokenize_and_align_labels,
+            batched=True,
+            remove_columns=remove_columns,
+            num_proc=data_args.preprocessing_num_workers,
+            load_from_cache_file=not data_args.overwrite_cache,
+        )
+
+    if training_args.do_eval:
+        if "test" not in datasets:
+            raise ValueError("--do_eval requires a test dataset")
+        test_dataset = datasets["test"]
+        if data_args.max_test_samples is not None:
+            test_dataset = test_dataset.select(range(data_args.max_test_samples))
+        test_dataset = test_dataset.map(
+            tokenize_and_align_labels,
+            batched=True,
+            remove_columns=remove_columns,
+            num_proc=data_args.preprocessing_num_workers,
+            load_from_cache_file=not data_args.overwrite_cache,
+        )
+
+    # Data collator
+    data_collator = DataCollatorForKeyValueExtraction(
+        tokenizer,
+        pad_to_multiple_of=8 if training_args.fp16 else None,
+        padding=padding,
+        max_length=512,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Initialize our Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset if training_args.do_train else None,
+        eval_dataset=test_dataset if training_args.do_eval else None,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+        compute_metrics=compute_metrics,
+    )
+
+    # Training
+    if training_args.do_train:
+        checkpoint = last_checkpoint if last_checkpoint else None
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        metrics = train_result.metrics
+        trainer.save_model()  # Saves the tokenizer too for easy upload
+
+        max_train_samples = (
+            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
+        )
+        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
+
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+
+    # Evaluation
+    if training_args.do_eval:
+        from model import ORTModel
+        def eval_func(model):
+            logger.info("*** Evaluate ***")
+            ort_model = ORTModel(
+                model,
+                compute_metrics=compute_metrics,
+            )
+            outputs = ort_model.evaluation_loop(test_dataset)
+            return outputs.metrics['f1']
+
+        if model_args.tune:
+            # optimize model
+            from onnxruntime.transformers import optimizer
+            from onnxruntime.transformers.fusion_options import FusionOptions
+            opt_options = FusionOptions('bert')
+
+            model_optimizer = optimizer.optimize_model(
+                model_args.input_model,
+                'bert',
+                num_heads=12,
+                hidden_size=768,
+                optimization_options=opt_options)
+            onnx_model = model_optimizer.model
+
+            # check the optimized model is valid
+            try:
+                onnxruntime.InferenceSession(onnx_model.SerializeToString(), providers=onnxruntime.get_available_providers())
+            except Exception as e:
+                logger.warning("Optimized model is invalid: {}. ".format(e))
+                logger.warning("Model optimizer will be skipped. " \
+                            "Try to upgrade onnxruntime to avoid this error")
+                onnx_model = onnx.load(model_args.input_model)
+            
+            from neural_compressor import quantization, PostTrainingQuantConfig
+            from neural_compressor.utils.constant import FP32
+
+            calib_dataset = IncDataset(test_dataset, onnx_model)
+            fp32_op_names = ['Attention_(0|3|4)', 
+                            '/layoutlm/embeddings/Add(_(2|4|6)|)',
+                            '.*?_position_embeddings.*?']
+            config = PostTrainingQuantConfig(approach='static',
+                                             quant_format=model_args.quant_format,
+                                             op_name_dict={op_name:FP32 for op_name in fp32_op_names})
+            q_model = quantization.fit(onnx_model, 
+                                       config,
+                                       eval_func=eval_func,
+                                       calib_dataloader=DataLoader(framework='onnxruntime',
+                                                                   dataset=calib_dataset, 
+                                                                   batch_size=1))
+            q_model.save(model_args.save_path)
+        if model_args.benchmark:
+            onnx_model = onnx.load(model_args.input_model)
+            if model_args.mode == 'performance':
+                from neural_compressor.benchmark import fit
+                from neural_compressor.config import BenchmarkConfig
+                b_dataset = IncDataset(test_dataset, onnx_model)
+                conf = BenchmarkConfig(iteration=100,
+                                        cores_per_instance=28,
+                                        num_of_instance=1)
+                b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+                fit(onnx_model, conf, b_dataloader=b_dataloader)
+            elif model_args.mode == 'accuracy':
+                eval_f1 = eval_func(onnx_model)
+                print("Batch size = %d" % model_args.batch_size)
+                print("Accuracy: %.5f" % eval_f1)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py
index 0a6c03e7c21..9e94ca0f56f 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/model.py
@@ -1,80 +1,80 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            onnx_inputs = {key: np.array([inputs[key]]) for key in self.onnx_input_names if key in inputs}
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            onnx_inputs = {key: np.array([inputs[key]]) for key in self.onnx_input_names if key in inputs}
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt
similarity index 92%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt
index 0510bcb4b39..54266b7289d 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/requirements.txt
@@ -1,20 +1,20 @@
-accelerate
-datasets
-transformers
-huggingface-hub
-seqeval
-tensorboard
-sentencepiece
-timm
-fvcore
-Pillow
-einops
-textdistance
-shapely
-protobuf
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
+accelerate
+datasets
+transformers
+huggingface-hub
+seqeval
+tensorboard
+sentencepiece
+timm
+fvcore
+Pillow
+einops
+textdistance
+shapely
+protobuf
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py
index 0a2f88390f8..7379ae60f55 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_dynamic/trainer.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/trainer.py
@@ -1,21 +1,21 @@
-from typing import Any, Dict, Union
-
-import torch
-
-from transformers import Trainer
-
-
-class FunsdTrainer(Trainer):
-    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
-        """
-        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
-        handling potential state.
-        """
-        for k, v in inputs.items():
-            if hasattr(v, "to") and hasattr(v, "device"):
-                inputs[k] = v.to(self.args.device)
-
-        if self.args.past_index >= 0 and self._past is not None:
-            inputs["mems"] = self._past
-
+from typing import Any, Dict, Union
+
+import torch
+
+from transformers import Trainer
+
+
+class FunsdTrainer(Trainer):
+    def _prepare_inputs(self, inputs: Dict[str, Union[torch.Tensor, Any]]) -> Dict[str, Union[torch.Tensor, Any]]:
+        """
+        Prepare :obj:`inputs` before feeding them to the model, converting them to tensors if they are not already and
+        handling potential state.
+        """
+        for k, v in inputs.items():
+            if hasattr(v, "to") and hasattr(v, "device"):
+                inputs[k] = v.to(self.args.device)
+
+        if self.args.past_index >= 0 and self._past is not None:
+            inputs["mems"] = self._past
+
         return inputs
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py
index c77f3e46317..f7ff0396188 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlm/quantization/ptq_static/utils.py
@@ -1,506 +1,506 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-from __future__ import division
-from typing import Any, Dict, List, Optional, Tuple
-import torch
-from torch import device
-from torch.nn import functional as F
-from dataclasses import dataclass, field
-from typing import Optional, Union
-from transformers import PreTrainedTokenizerBase
-from transformers.file_utils import PaddingStrategy
-import numpy as np
-from PIL import Image
-
-from fvcore.transforms.transform import TransformList, Transform
-
-from iopath.common.file_io import PathManager as PathManagerBase
-PathManager = PathManagerBase()
-
-# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
-_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
-
-# https://www.exiv2.org/tags.html
-_EXIF_ORIENT = 274  # exif 'Orientation' tag
-
-
-@torch.jit.script_if_tracing
-def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor:
-    """
-    Tracing friendly way to cast tensor to another tensor's device. Device will be treated
-    as constant during tracing, scripting the casting process as whole can workaround this issue.
-    """
-    return src.to(dst.device)
-
-def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor:
-    """
-    Turn a list of integer scalars or integer Tensor scalars into a vector,
-    in a way that's both traceable and scriptable.
-
-    In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs.
-    In scripting or eager, `x` should be a list of int.
-    """
-    if torch.jit.is_scripting():
-        return torch.as_tensor(x, device=device)
-    if torch.jit.is_tracing():
-        assert all(
-            [isinstance(t, torch.Tensor) for t in x]
-        ), "Shape should be tensor during tracing!"
-        # as_tensor should not be used in tracing because it records a constant
-        ret = torch.stack(x)
-        if ret.device != device:  # avoid recording a hard-coded device if not necessary
-            ret = ret.to(device=device)
-        return ret
-    return torch.as_tensor(x, device=device)
-
-class ImageList(object):
-    """
-    Structure that holds a list of images (of possibly
-    varying sizes) as a single tensor.
-    This works by padding the images to the same size.
-    The original sizes of each image is stored in `image_sizes`.
-
-    Attributes:
-        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
-            During tracing, it becomes list[Tensor] instead.
-    """
-
-    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
-        """
-        Arguments:
-            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
-            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
-                be smaller than (H, W) due to padding.
-        """
-        self.tensor = tensor
-        self.image_sizes = image_sizes
-
-    def __len__(self) -> int:
-        return len(self.image_sizes)
-
-    def __getitem__(self, idx) -> torch.Tensor:
-        """
-        Access the individual image in its original size.
-
-        Args:
-            idx: int or slice
-
-        Returns:
-            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
-        """
-        size = self.image_sizes[idx]
-        return self.tensor[idx, ..., : size[0], : size[1]]
-
-    @torch.jit.unused
-    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
-        cast_tensor = self.tensor.to(*args, **kwargs)
-        return ImageList(cast_tensor, self.image_sizes)
-
-    @property
-    def device(self) -> device:
-        return self.tensor.device
-
-    @staticmethod
-    def from_tensors(
-        tensors: List[torch.Tensor],
-        size_divisibility: int = 0,
-        pad_value: float = 0.0,
-        padding_constraints: Optional[Dict[str, int]] = None,
-    ) -> "ImageList":
-        """
-        Args:
-            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
-                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
-                to the same shape with `pad_value`.
-            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
-                the common height and width is divisible by `size_divisibility`.
-                This depends on the model and many models need a divisibility of 32.
-            pad_value (float): value to pad.
-            padding_constraints (optional[Dict]): If given, it would follow the format as
-                {"size_divisibility": int, "square_size": int}, where `size_divisibility` will
-                overwrite the above one if presented and `square_size` indicates the
-                square padding size if `square_size` > 0.
-        Returns:
-            an `ImageList`.
-        """
-        assert len(tensors) > 0
-        assert isinstance(tensors, (tuple, list))
-        for t in tensors:
-            assert isinstance(t, torch.Tensor), type(t)
-            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
-
-        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
-        image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes]
-        max_size = torch.stack(image_sizes_tensor).max(0).values
-
-        if padding_constraints is not None:
-            square_size = padding_constraints.get("square_size", 0)
-            if square_size > 0:
-                # pad to square.
-                max_size[0] = max_size[1] = square_size
-            if "size_divisibility" in padding_constraints:
-                size_divisibility = padding_constraints["size_divisibility"]
-        if size_divisibility > 1:
-            stride = size_divisibility
-            # the last two dims are H,W, both subject to divisibility requirement
-            max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride
-
-        # handle weirdness of scripting and tracing ...
-        if torch.jit.is_scripting():
-            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
-        else:
-            if torch.jit.is_tracing():
-                image_sizes = image_sizes_tensor
-
-        if len(tensors) == 1:
-            # This seems slightly (2%) faster.
-            # TODO: check whether it's faster for multiple images as well
-            image_size = image_sizes[0]
-            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
-            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
-        else:
-            # max_size can be a tensor in tracing mode, therefore convert to list
-            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
-            device = (
-                None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None)
-            )
-            batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device)
-            batched_imgs = move_device_like(batched_imgs, tensors[0])
-            for i, img in enumerate(tensors):
-                # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)`
-                # Tracing mode cannot capture `copy_()` of temporary locals
-                batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img)
-
-        return ImageList(batched_imgs.contiguous(), image_sizes)
-
-def convert_PIL_to_numpy(image, format):
-    """
-    Convert PIL image to numpy array of target format.
-
-    Args:
-        image (PIL.Image): a PIL image
-        format (str): the format of output image
-
-    Returns:
-        (np.ndarray): also see `read_image`
-    """
-    if format is not None:
-        # PIL only supports RGB, so convert to RGB and flip channels over below
-        conversion_format = format
-        if format in ["BGR", "YUV-BT.601"]:
-            conversion_format = "RGB"
-        image = image.convert(conversion_format)
-    image = np.asarray(image)
-    # PIL squeezes out the channel dimension for "L", so make it HWC
-    if format == "L":
-        image = np.expand_dims(image, -1)
-
-    # handle formats not supported by PIL
-    elif format == "BGR":
-        # flip channels if needed
-        image = image[:, :, ::-1]
-    elif format == "YUV-BT.601":
-        image = image / 255.0
-        image = np.dot(image, np.array(_M_RGB2YUV).T)
-
-    return image
-
-def _apply_exif_orientation(image):
-    """
-    Applies the exif orientation correctly.
-
-    This code exists per the bug:
-      https://github.com/python-pillow/Pillow/issues/3973
-    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
-    various methods, especially `tobytes`
-
-    Function based on:
-      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
-      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
-
-    Args:
-        image (PIL.Image): a PIL image
-
-    Returns:
-        (PIL.Image): the PIL image with exif orientation applied, if applicable
-    """
-    if not hasattr(image, "getexif"):
-        return image
-
-    try:
-        exif = image.getexif()
-    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
-        exif = None
-
-    if exif is None:
-        return image
-
-    orientation = exif.get(_EXIF_ORIENT)
-
-    method = {
-        2: Image.FLIP_LEFT_RIGHT,
-        3: Image.ROTATE_180,
-        4: Image.FLIP_TOP_BOTTOM,
-        5: Image.TRANSPOSE,
-        6: Image.ROTATE_270,
-        7: Image.TRANSVERSE,
-        8: Image.ROTATE_90,
-    }.get(orientation)
-
-    if method is not None:
-        return image.transpose(method)
-    return image
-
-def read_image(file_name, format=None):
-    """
-    Read an image into the given format.
-    Will apply rotation and flipping if the image has such exif information.
-
-    Args:
-        file_name (str): image file path
-        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
-
-    Returns:
-        image (np.ndarray):
-            an HWC image in the given format, which is 0-255, uint8 for
-            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
-    """
-    with PathManager.open(file_name, "rb") as f:
-        image = Image.open(f)
-
-        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
-        image = _apply_exif_orientation(image)
-        return convert_PIL_to_numpy(image, format)
-    
-class ResizeTransform(Transform):
-    """
-    Resize the image to a target size.
-    """
-
-    def __init__(self, h, w, new_h, new_w, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            new_h, new_w (int): new image size
-            interp: PIL interpolation methods, defaults to bilinear.
-        """
-        # TODO decide on PIL vs opencv
-        super().__init__()
-        if interp is None:
-            interp = Image.BILINEAR
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        assert img.shape[:2] == (self.h, self.w)
-        assert len(img.shape) <= 4
-        interp_method = interp if interp is not None else self.interp
-
-        if img.dtype == np.uint8:
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                pil_image = Image.fromarray(img[:, :, 0], mode="L")
-            else:
-                pil_image = Image.fromarray(img)
-            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
-            ret = np.asarray(pil_image)
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                ret = np.expand_dims(ret, -1)
-        else:
-            # PIL only supports uint8
-            if any(x < 0 for x in img.strides):
-                img = np.ascontiguousarray(img)
-            img = torch.from_numpy(img)
-            shape = list(img.shape)
-            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
-            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
-            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
-                Image.NEAREST: "nearest",
-                Image.BILINEAR: "bilinear",
-                Image.BICUBIC: "bicubic",
-            }
-            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
-            align_corners = None if mode == "nearest" else False
-            img = F.interpolate(
-                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
-            )
-            shape[:2] = (self.new_h, self.new_w)
-            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
-
-        return ret
-
-    def apply_coords(self, coords):
-        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
-        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-    def inverse(self):
-        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
-    
-@dataclass
-class DataCollatorForKeyValueExtraction:
-    """
-    Data collator that will dynamically pad the inputs received, as well as the labels.
-
-    Args:
-        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
-            The tokenizer used for encoding the data.
-        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
-            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
-            among:
-
-            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
-              sequence if provided).
-            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
-              maximum acceptable input length for the model if that argument is not provided.
-            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
-              different lengths).
-        max_length (:obj:`int`, `optional`):
-            Maximum length of the returned list and optionally padding length (see above).
-        pad_to_multiple_of (:obj:`int`, `optional`):
-            If set will pad the sequence to a multiple of the provided value.
-
-            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
-            7.5 (Volta).
-        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
-            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
-    """
-
-    tokenizer: PreTrainedTokenizerBase
-    padding: Union[bool, str, PaddingStrategy] = True
-    max_length: Optional[int] = None
-    pad_to_multiple_of: Optional[int] = None
-    label_pad_token_id: int = -100
-
-    def __call__(self, features):
-        label_name = "label" if "label" in features[0].keys() else "labels"
-        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
-
-        has_image_input = "image" in features[0]
-        has_bbox_input = "bbox" in features[0]
-        if has_image_input:
-            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
-            for feature in features:
-                del feature["image"]
-        batch = self.tokenizer.pad(
-            features,
-            padding=self.padding,
-            max_length=self.max_length,
-            pad_to_multiple_of=self.pad_to_multiple_of,
-            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
-            return_tensors="pt" if labels is None else None,
-        )
-
-        if labels is None:
-            return batch
-
-        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
-        padding_side = self.tokenizer.padding_side
-        if padding_side == "right":
-            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
-        else:
-            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
-
-        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
-        if has_image_input:
-            batch["image"] = image
-        return batch
-    
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-
-
-@dataclass
-class XFUNDataTrainingArguments(DataTrainingArguments):
-    lang: Optional[str] = field(default="en")
-    additional_langs: Optional[str] = field(default=None)
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-def load_image(image_path):
-    image = read_image(image_path, format="BGR")
-    h = image.shape[0]
-    w = image.shape[1]
-    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
-    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+# Copyright (c) Facebook, Inc. and its affiliates.
+from __future__ import division
+from typing import Any, Dict, List, Optional, Tuple
+import torch
+from torch import device
+from torch.nn import functional as F
+from dataclasses import dataclass, field
+from typing import Optional, Union
+from transformers import PreTrainedTokenizerBase
+from transformers.file_utils import PaddingStrategy
+import numpy as np
+from PIL import Image
+
+from fvcore.transforms.transform import TransformList, Transform
+
+from iopath.common.file_io import PathManager as PathManagerBase
+PathManager = PathManagerBase()
+
+# https://en.wikipedia.org/wiki/YUV#SDTV_with_BT.601
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+
+@torch.jit.script_if_tracing
+def move_device_like(src: torch.Tensor, dst: torch.Tensor) -> torch.Tensor:
+    """
+    Tracing friendly way to cast tensor to another tensor's device. Device will be treated
+    as constant during tracing, scripting the casting process as whole can workaround this issue.
+    """
+    return src.to(dst.device)
+
+def shapes_to_tensor(x: List[int], device: Optional[torch.device] = None) -> torch.Tensor:
+    """
+    Turn a list of integer scalars or integer Tensor scalars into a vector,
+    in a way that's both traceable and scriptable.
+
+    In tracing, `x` should be a list of scalar Tensor, so the output can trace to the inputs.
+    In scripting or eager, `x` should be a list of int.
+    """
+    if torch.jit.is_scripting():
+        return torch.as_tensor(x, device=device)
+    if torch.jit.is_tracing():
+        assert all(
+            [isinstance(t, torch.Tensor) for t in x]
+        ), "Shape should be tensor during tracing!"
+        # as_tensor should not be used in tracing because it records a constant
+        ret = torch.stack(x)
+        if ret.device != device:  # avoid recording a hard-coded device if not necessary
+            ret = ret.to(device=device)
+        return ret
+    return torch.as_tensor(x, device=device)
+
+class ImageList(object):
+    """
+    Structure that holds a list of images (of possibly
+    varying sizes) as a single tensor.
+    This works by padding the images to the same size.
+    The original sizes of each image is stored in `image_sizes`.
+
+    Attributes:
+        image_sizes (list[tuple[int, int]]): each tuple is (h, w).
+            During tracing, it becomes list[Tensor] instead.
+    """
+
+    def __init__(self, tensor: torch.Tensor, image_sizes: List[Tuple[int, int]]):
+        """
+        Arguments:
+            tensor (Tensor): of shape (N, H, W) or (N, C_1, ..., C_K, H, W) where K >= 1
+            image_sizes (list[tuple[int, int]]): Each tuple is (h, w). It can
+                be smaller than (H, W) due to padding.
+        """
+        self.tensor = tensor
+        self.image_sizes = image_sizes
+
+    def __len__(self) -> int:
+        return len(self.image_sizes)
+
+    def __getitem__(self, idx) -> torch.Tensor:
+        """
+        Access the individual image in its original size.
+
+        Args:
+            idx: int or slice
+
+        Returns:
+            Tensor: an image of shape (H, W) or (C_1, ..., C_K, H, W) where K >= 1
+        """
+        size = self.image_sizes[idx]
+        return self.tensor[idx, ..., : size[0], : size[1]]
+
+    @torch.jit.unused
+    def to(self, *args: Any, **kwargs: Any) -> "ImageList":
+        cast_tensor = self.tensor.to(*args, **kwargs)
+        return ImageList(cast_tensor, self.image_sizes)
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    @staticmethod
+    def from_tensors(
+        tensors: List[torch.Tensor],
+        size_divisibility: int = 0,
+        pad_value: float = 0.0,
+        padding_constraints: Optional[Dict[str, int]] = None,
+    ) -> "ImageList":
+        """
+        Args:
+            tensors: a tuple or list of `torch.Tensor`, each of shape (Hi, Wi) or
+                (C_1, ..., C_K, Hi, Wi) where K >= 1. The Tensors will be padded
+                to the same shape with `pad_value`.
+            size_divisibility (int): If `size_divisibility > 0`, add padding to ensure
+                the common height and width is divisible by `size_divisibility`.
+                This depends on the model and many models need a divisibility of 32.
+            pad_value (float): value to pad.
+            padding_constraints (optional[Dict]): If given, it would follow the format as
+                {"size_divisibility": int, "square_size": int}, where `size_divisibility` will
+                overwrite the above one if presented and `square_size` indicates the
+                square padding size if `square_size` > 0.
+        Returns:
+            an `ImageList`.
+        """
+        assert len(tensors) > 0
+        assert isinstance(tensors, (tuple, list))
+        for t in tensors:
+            assert isinstance(t, torch.Tensor), type(t)
+            assert t.shape[:-2] == tensors[0].shape[:-2], t.shape
+
+        image_sizes = [(im.shape[-2], im.shape[-1]) for im in tensors]
+        image_sizes_tensor = [shapes_to_tensor(x) for x in image_sizes]
+        max_size = torch.stack(image_sizes_tensor).max(0).values
+
+        if padding_constraints is not None:
+            square_size = padding_constraints.get("square_size", 0)
+            if square_size > 0:
+                # pad to square.
+                max_size[0] = max_size[1] = square_size
+            if "size_divisibility" in padding_constraints:
+                size_divisibility = padding_constraints["size_divisibility"]
+        if size_divisibility > 1:
+            stride = size_divisibility
+            # the last two dims are H,W, both subject to divisibility requirement
+            max_size = (max_size + (stride - 1)).div(stride, rounding_mode="floor") * stride
+
+        # handle weirdness of scripting and tracing ...
+        if torch.jit.is_scripting():
+            max_size: List[int] = max_size.to(dtype=torch.long).tolist()
+        else:
+            if torch.jit.is_tracing():
+                image_sizes = image_sizes_tensor
+
+        if len(tensors) == 1:
+            # This seems slightly (2%) faster.
+            # TODO: check whether it's faster for multiple images as well
+            image_size = image_sizes[0]
+            padding_size = [0, max_size[-1] - image_size[1], 0, max_size[-2] - image_size[0]]
+            batched_imgs = F.pad(tensors[0], padding_size, value=pad_value).unsqueeze_(0)
+        else:
+            # max_size can be a tensor in tracing mode, therefore convert to list
+            batch_shape = [len(tensors)] + list(tensors[0].shape[:-2]) + list(max_size)
+            device = (
+                None if torch.jit.is_scripting() else ("cpu" if torch.jit.is_tracing() else None)
+            )
+            batched_imgs = tensors[0].new_full(batch_shape, pad_value, device=device)
+            batched_imgs = move_device_like(batched_imgs, tensors[0])
+            for i, img in enumerate(tensors):
+                # Use `batched_imgs` directly instead of `img, pad_img = zip(tensors, batched_imgs)`
+                # Tracing mode cannot capture `copy_()` of temporary locals
+                batched_imgs[i, ..., : img.shape[-2], : img.shape[-1]].copy_(img)
+
+        return ImageList(batched_imgs.contiguous(), image_sizes)
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+    
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = F.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+    
+@dataclass
+class DataCollatorForKeyValueExtraction:
+    """
+    Data collator that will dynamically pad the inputs received, as well as the labels.
+
+    Args:
+        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
+            The tokenizer used for encoding the data.
+        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
+            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
+            among:
+
+            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
+              sequence if provided).
+            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
+              maximum acceptable input length for the model if that argument is not provided.
+            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
+              different lengths).
+        max_length (:obj:`int`, `optional`):
+            Maximum length of the returned list and optionally padding length (see above).
+        pad_to_multiple_of (:obj:`int`, `optional`):
+            If set will pad the sequence to a multiple of the provided value.
+
+            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
+            7.5 (Volta).
+        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
+            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
+    """
+
+    tokenizer: PreTrainedTokenizerBase
+    padding: Union[bool, str, PaddingStrategy] = True
+    max_length: Optional[int] = None
+    pad_to_multiple_of: Optional[int] = None
+    label_pad_token_id: int = -100
+
+    def __call__(self, features):
+        label_name = "label" if "label" in features[0].keys() else "labels"
+        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
+
+        has_image_input = "image" in features[0]
+        has_bbox_input = "bbox" in features[0]
+        if has_image_input:
+            image = ImageList.from_tensors([torch.tensor(feature["image"]) for feature in features], 32)
+            for feature in features:
+                del feature["image"]
+        batch = self.tokenizer.pad(
+            features,
+            padding=self.padding,
+            max_length=self.max_length,
+            pad_to_multiple_of=self.pad_to_multiple_of,
+            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
+            return_tensors="pt" if labels is None else None,
+        )
+
+        if labels is None:
+            return batch
+
+        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
+        padding_side = self.tokenizer.padding_side
+        if padding_side == "right":
+            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
+        else:
+            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
+            if has_bbox_input:
+                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
+
+        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
+        if has_image_input:
+            batch["image"] = image
+        return batch
+    
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+
+
+@dataclass
+class XFUNDataTrainingArguments(DataTrainingArguments):
+    lang: Optional[str] = field(default="en")
+    additional_langs: Optional[str] = field(default=None)
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
     return image, (w, h)
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py
index 6e9aa23e8c0..9f8af99d49c 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/funsd.py
@@ -1,135 +1,135 @@
-# coding=utf-8
-# Adapted from https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
-
-import json
-import os
-
-import datasets
-
-from PIL import Image
-import numpy as np
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-def load_image(image_path):
-    image = Image.open(image_path).convert("RGB")
-    w, h = image.size
-    return image, (w, h)
-
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """FUNSD dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "words": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            words = []
-            bboxes = []
-            ner_tags = []
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                words_example, label = item["words"], item["label"]
-                words_example = [w for w in words_example if w["text"].strip() != ""]
-                if len(words_example) == 0:
-                    continue
-                if label == "other":
-                    for w in words_example:
-                        words.append(w["text"])
-                        ner_tags.append("O")
-                        bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    words.append(words_example[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    bboxes.append(normalize_bbox(words_example[0]["box"], size))
-                    for w in words_example[1:]:
-                        words.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        bboxes.append(normalize_bbox(w["box"], size))
-            yield guid, {
-                "id": str(guid),
-                "words": words,
-                "bboxes": bboxes,
-                "ner_tags": ner_tags,
-                "image_path": image_path,
-            }
+# coding=utf-8
+# Adapted from https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+
+import json
+import os
+
+import datasets
+
+from PIL import Image
+import numpy as np
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+def load_image(image_path):
+    image = Image.open(image_path).convert("RGB")
+    w, h = image.size
+    return image, (w, h)
+
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """FUNSD dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "words": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            words = []
+            bboxes = []
+            ner_tags = []
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words_example, label = item["words"], item["label"]
+                words_example = [w for w in words_example if w["text"].strip() != ""]
+                if len(words_example) == 0:
+                    continue
+                if label == "other":
+                    for w in words_example:
+                        words.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    words.append(words_example[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words_example[0]["box"], size))
+                    for w in words_example[1:]:
+                        words.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+            yield guid, {
+                "id": str(guid),
+                "words": words,
+                "bboxes": bboxes,
+                "ner_tags": ner_tags,
+                "image_path": image_path,
+            }
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py
index c2bffd50b0b..a4a0756a1e4 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/image_utils.py
@@ -1,271 +1,271 @@
-# Adapted from https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/data/utils.py
-import torchvision.transforms.functional as F
-import warnings
-import math
-import random
-import numpy as np
-from PIL import Image
-import torch
-
-
-def crop(image, i, j, h, w, boxes=None):
-    cropped_image = F.crop(image, i, j, h, w)
-
-    if boxes is not None:
-        # Currently we cannot use this case since when some boxes is out of the cropped image,
-        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
-        # which haven't been implemented here
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        boxes = cropped_boxes.reshape(-1, 4)
-
-    return cropped_image, boxes
-
-
-def resize(image, size, interpolation, boxes=None):
-    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
-    # which is compatible with a square image size of 224x224
-    rescaled_image = F.resize(image, size, interpolation)
-
-    if boxes is None:
-        return rescaled_image, None
-
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-
-    # boxes = boxes.copy()
-    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
-    return rescaled_image, scaled_boxes
-
-
-def clamp(num, min_value, max_value):
-    return max(min(num, max_value), min_value)
-
-
-def get_bb(bb, page_size):
-    bbs = [float(j) for j in bb]
-    xs, ys = [], []
-    for i, b in enumerate(bbs):
-        if i % 2 == 0:
-            xs.append(b)
-        else:
-            ys.append(b)
-    (width, height) = page_size
-    return_bb = [
-        clamp(min(xs), 0, width - 1),
-        clamp(min(ys), 0, height - 1),
-        clamp(max(xs), 0, width - 1),
-        clamp(max(ys), 0, height - 1),
-    ]
-    return_bb = [
-        int(1000 * return_bb[0] / width),
-        int(1000 * return_bb[1] / height),
-        int(1000 * return_bb[2] / width),
-        int(1000 * return_bb[3] / height),
-    ]
-    return return_bb
-
-
-class ToNumpy:
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return np_img
-
-
-class ToTensor:
-    def __init__(self, dtype=torch.float32):
-        self.dtype = dtype
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return torch.from_numpy(np_img).to(dtype=self.dtype)
-
-
-_pil_interpolation_to_str = {
-    F.InterpolationMode.NEAREST: "F.InterpolationMode.NEAREST",
-    F.InterpolationMode.BILINEAR: "F.InterpolationMode.BILINEAR",
-    F.InterpolationMode.BICUBIC: "F.InterpolationMode.BICUBIC",
-    F.InterpolationMode.LANCZOS: "F.InterpolationMode.LANCZOS",
-    F.InterpolationMode.HAMMING: "F.InterpolationMode.HAMMING",
-    F.InterpolationMode.BOX: "F.InterpolationMode.BOX",
-}
-
-
-def _pil_interp(method):
-    if method == "bicubic":
-        return F.InterpolationMode.BICUBIC
-    elif method == "lanczos":
-        return F.InterpolationMode.LANCZOS
-    elif method == "hamming":
-        return F.InterpolationMode.HAMMING
-    else:
-        # default bilinear, do we want to allow nearest?
-        return F.InterpolationMode.BILINEAR
-
-
-class Compose:
-    """Composes several transforms together. This transform does not support torchscript.
-    Please, see the note below.
-
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.PILToTensor(),
-        >>>     transforms.ConvertImageDtype(torch.float),
-        >>> ])
-
-    .. note::
-        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
-
-        >>> transforms = torch.nn.Sequential(
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-        >>> )
-        >>> scripted_transforms = torch.jit.script(transforms)
-
-        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
-        `lambda` functions or ``PIL.Image``.
-
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img, augmentation=False, box=None):
-        for t in self.transforms:
-            img = t(img, augmentation, box)
-        return img
-
-
-class RandomResizedCropAndInterpolationWithTwoPic:
-    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
-    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
-    is finally resized to given size.
-    This is popularly used to train the Inception networks.
-    Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
-    """
-
-    def __init__(
-        self,
-        size,
-        second_size=None,
-        scale=(0.08, 1.0),
-        ratio=(3.0 / 4.0, 4.0 / 3.0),
-        interpolation="bilinear",
-        second_interpolation="lanczos",
-    ):
-        if isinstance(size, tuple):
-            self.size = size
-        else:
-            self.size = (size, size)
-        if second_size is not None:
-            if isinstance(second_size, tuple):
-                self.second_size = second_size
-            else:
-                self.second_size = (second_size, second_size)
-        else:
-            self.second_size = None
-        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
-
-        self.interpolation = _pil_interp(interpolation)
-        self.second_interpolation = _pil_interp(second_interpolation)
-        self.scale = scale
-        self.ratio = ratio
-
-    @staticmethod
-    def get_params(img, scale, ratio):
-        """Get parameters for ``crop`` for a random sized crop.
-        Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
-            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
-        Returns:
-            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
-                sized crop.
-        """
-        area = img.size[0] * img.size[1]
-
-        for attempt in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w <= img.size[0] and h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
-                return i, j, h, w
-
-        # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
-        if in_ratio < min(ratio):
-            w = img.size[0]
-            h = int(round(w / min(ratio)))
-        elif in_ratio > max(ratio):
-            h = img.size[1]
-            w = int(round(h * max(ratio)))
-        else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
-        return i, j, h, w
-
-    def __call__(self, img, augmentation=False, box=None):
-        """
-        Args:
-            img (PIL Image): Image to be cropped and resized.
-        Returns:
-            PIL Image: Randomly cropped and resized image.
-        """
-        if augmentation:
-            i, j, h, w = self.get_params(img, self.scale, self.ratio)
-            img = F.crop(img, i, j, h, w)
-            # img, box = crop(img, i, j, h, w, box)
-        img = F.resize(img, self.size, self.interpolation)
-        second_img = (
-            F.resize(img, self.second_size, self.second_interpolation) if self.second_size is not None else None
-        )
-        return img, second_img
-
-    def __repr__(self):
-        if isinstance(self.interpolation, (tuple, list)):
-            interpolate_str = " ".join([_pil_interpolation_to_str[x] for x in self.interpolation])
-        else:
-            interpolate_str = _pil_interpolation_to_str[self.interpolation]
-        format_string = self.__class__.__name__ + "(size={0}".format(self.size)
-        format_string += ", scale={0}".format(tuple(round(s, 4) for s in self.scale))
-        format_string += ", ratio={0}".format(tuple(round(r, 4) for r in self.ratio))
-        format_string += ", interpolation={0}".format(interpolate_str)
-        if self.second_size is not None:
-            format_string += ", second_size={0}".format(self.second_size)
-            format_string += ", second_interpolation={0}".format(_pil_interpolation_to_str[self.second_interpolation])
-        format_string += ")"
-        return format_string
-
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, "rb") as f:
-        img = Image.open(f)
-        return img.convert("RGB")
+# Adapted from https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/data/utils.py
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+        int(1000 * return_bb[0] / width),
+        int(1000 * return_bb[1] / height),
+        int(1000 * return_bb[2] / width),
+        int(1000 * return_bb[3] / height),
+    ]
+    return return_bb
+
+
+class ToNumpy:
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: "F.InterpolationMode.NEAREST",
+    F.InterpolationMode.BILINEAR: "F.InterpolationMode.BILINEAR",
+    F.InterpolationMode.BICUBIC: "F.InterpolationMode.BICUBIC",
+    F.InterpolationMode.LANCZOS: "F.InterpolationMode.LANCZOS",
+    F.InterpolationMode.HAMMING: "F.InterpolationMode.HAMMING",
+    F.InterpolationMode.BOX: "F.InterpolationMode.BOX",
+}
+
+
+def _pil_interp(method):
+    if method == "bicubic":
+        return F.InterpolationMode.BICUBIC
+    elif method == "lanczos":
+        return F.InterpolationMode.LANCZOS
+    elif method == "hamming":
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(
+        self,
+        size,
+        second_size=None,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        interpolation="bilinear",
+        second_interpolation="lanczos",
+    ):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = (
+            F.resize(img, self.second_size, self.second_interpolation) if self.second_size is not None else None
+        )
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = " ".join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + "(size={0}".format(self.size)
+        format_string += ", scale={0}".format(tuple(round(s, 4) for s in self.scale))
+        format_string += ", ratio={0}".format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ", interpolation={0}".format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ", second_size={0}".format(self.second_size)
+            format_string += ", second_interpolation={0}".format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ")"
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, "rb") as f:
+        img = Image.open(f)
+        return img.convert("RGB")
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
index c7cf936270d..460251be8bd 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/main.py
@@ -1,562 +1,562 @@
-#!/usr/bin/env python
-# coding=utf-8
-from image_utils import (
-    Compose,
-    RandomResizedCropAndInterpolationWithTwoPic,
-    pil_loader,
-)
-from torchvision import transforms
-from timm.data.constants import (
-    IMAGENET_DEFAULT_MEAN,
-    IMAGENET_DEFAULT_STD,
-    IMAGENET_INCEPTION_MEAN,
-    IMAGENET_INCEPTION_STD,
-)
-from neural_compressor.data import DataLoader
-import torch
-import onnxruntime
-import onnx
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-import transformers
-from datasets import ClassLabel, load_dataset, load_metric
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    Trainer,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-
-# Will error if the minimal version of Transformers is not installed.
-# Remove at your own risks.
-check_min_version("4.5.0")
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "Pretrained config name or path if not the same as model_name"},
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"},
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(default=None, metadata={"help": "Path to onnx model"})
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default="performance",
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-    quant_format: str = field(
-        default="QOperator",
-        metadata={"help": ("quant format")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default="funsd",
-        metadata={"help": "The name of the dataset to use (via the datasets library)."},
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."},
-    )
-    train_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "The input training data file (a csv or JSON file)."},
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False,
-        metadata={"help": "Overwrite the cached training and evaluation sets"},
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-    segment_level_layout: bool = field(default=True)
-    visual_embed: bool = field(default=True)
-    data_dir: Optional[str] = field(default=None)
-    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
-    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
-    train_interpolation: str = field(
-        default="bicubic",
-        metadata={"help": "Training interpolation (random, bilinear, bicubic)"},
-    )
-    second_interpolation: str = field(
-        default="lanczos",
-        metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"},
-    )
-    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
-
-
-class IncDataset:
-    def __init__(
-        self,
-        dataset,
-        model,
-        label_names=None,
-    ):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(
-            model.SerializeToString(),
-            providers=onnxruntime.get_available_providers(),
-        )
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            onnx_inputs = []
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            """
-            LayoutLMV2 inputs (with order):
-            {
-                'input_ids': {0: 'batch_size', 1: 'sequence_length'},
-                'bbox': {0: 'batch_size', 1: 'sequence_length'},
-                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
-                'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
-            }
-            """
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    # onnx_inputs[key] = np.array([inputs[key]])
-                    onnx_inputs.append(np.array(inputs[key]))
-                    # self.onnx_inputs.append([np.array(inputs[key])])
-                elif key == "image":
-                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-                    onnx_inputs.append(np.array(inputs["images"], dtype=np.int64))
-                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
-
-            self.onnx_inputs.append(onnx_inputs)
-            onnx_inputs = []
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        (
-            model_args,
-            data_args,
-            training_args,
-        ) = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    if data_args.dataset_name == "funsd":
-        # datasets = load_dataset("nielsr/funsd")
-        import funsd
-
-        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
-    else:
-        raise NotImplementedError()
-
-    column_names = datasets["test"].column_names
-    features = datasets["test"].features
-
-    text_column_name = "words" if "words" in column_names else "tokens"
-    boxes_column_name = "bboxes"
-
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = sorted(unique_labels)
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        input_size=data_args.input_size,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        tokenizer_file=None,
-        # avoid loading from a cached file of the pre-trained model in another
-        # machine
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        add_prefix_space=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    if data_args.visual_embed:
-        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
-        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
-        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
-        common_transform = Compose(
-            [
-                # transforms.ColorJitter(0.4, 0.4, 0.4),
-                # transforms.RandomHorizontalFlip(p=0.5),
-                RandomResizedCropAndInterpolationWithTwoPic(
-                    size=data_args.input_size,
-                    interpolation=data_args.train_interpolation,
-                ),
-            ]
-        )
-
-        patch_transform = transforms.Compose(
-            [
-                transforms.ToTensor(),
-                transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
-            ]
-        )
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples, augmentation=False):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=False,
-            truncation=True,
-            return_overflowing_tokens=True,
-            boxes=examples[boxes_column_name],
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-
-            if data_args.visual_embed:
-                ipath = examples["image_path"][org_batch_index]
-                img = pil_loader(ipath)
-                for_patches, _ = common_transform(img, augmentation=augmentation)
-                patch = patch_transform(for_patches)
-                images.append(patch)
-
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        if data_args.visual_embed:
-            tokenized_inputs["images"] = images
-
-        return tokenized_inputs
-
-    validation_name = "test"
-    if validation_name not in datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_dataset = datasets[validation_name]
-    if data_args.max_val_samples is not None:
-        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
-    eval_dataset = eval_dataset.map(
-        tokenize_and_align_labels,
-        batched=True,
-        remove_columns=remove_columns,
-        num_proc=data_args.preprocessing_num_workers,
-        load_from_cache_file=not data_args.overwrite_cache,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Evaluation
-    from model import ORTModel
-
-    def eval_func(model):
-        logger.info("*** Evaluate ***")
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        return outputs.metrics["f1"]
-
-    if model_args.tune:
-        from neural_compressor import PostTrainingQuantConfig, quantization
-        from neural_compressor.utils.constant import FP32
-        import onnx
-
-        onnx_model = onnx.load(model_args.input_model)
-        calib_dataset = IncDataset(eval_dataset, onnx_model)
-        # TODO double check it for better perf
-        config = PostTrainingQuantConfig(
-            approach="dynamic",
-            quant_level=1,
-            quant_format=model_args.quant_format,
-        )
-        # recipes={'smooth_quant': True, 'smooth_quant_args': {'alpha': 0.5}},
-        # TODO double-check the sq, some issues
-        q_model = quantization.fit(
-            onnx_model,
-            config,
-            eval_func=eval_func,
-            calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset, batch_size=1),
-        )
-        q_model.save(model_args.save_path)
-
-    if model_args.benchmark:
-        import onnx
-        onnx_model = onnx.load(model_args.input_model)
-        if model_args.mode == "performance":
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-
-            b_dataset = IncDataset(eval_dataset, onnx_model)
-            conf = BenchmarkConfig(
-                iteration=100,
-                cores_per_instance=28,
-                num_of_instance=1,
-            )
-            b_dataloader = DataLoader(
-                framework="onnxruntime",
-                dataset=b_dataset,
-                batch_size=model_args.batch_size,
-            )
-            fit(onnx_model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == "accuracy":
-            eval_f1 = eval_func(onnx_model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+from image_utils import (
+    Compose,
+    RandomResizedCropAndInterpolationWithTwoPic,
+    pil_loader,
+)
+from torchvision import transforms
+from timm.data.constants import (
+    IMAGENET_DEFAULT_MEAN,
+    IMAGENET_DEFAULT_STD,
+    IMAGENET_INCEPTION_MEAN,
+    IMAGENET_INCEPTION_STD,
+)
+from neural_compressor.data import DataLoader
+import torch
+import onnxruntime
+import onnx
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+import transformers
+from datasets import ClassLabel, load_dataset, load_metric
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+
+# Will error if the minimal version of Transformers is not installed.
+# Remove at your own risks.
+check_min_version("4.5.0")
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "Pretrained config name or path if not the same as model_name"},
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"},
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(default=None, metadata={"help": "Path to onnx model"})
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default="performance",
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+    quant_format: str = field(
+        default="QOperator",
+        metadata={"help": ("quant format")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default="funsd",
+        metadata={"help": "The name of the dataset to use (via the datasets library)."},
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."},
+    )
+    train_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "The input training data file (a csv or JSON file)."},
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False,
+        metadata={"help": "Overwrite the cached training and evaluation sets"},
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+    segment_level_layout: bool = field(default=True)
+    visual_embed: bool = field(default=True)
+    data_dir: Optional[str] = field(default=None)
+    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
+    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
+    train_interpolation: str = field(
+        default="bicubic",
+        metadata={"help": "Training interpolation (random, bilinear, bicubic)"},
+    )
+    second_interpolation: str = field(
+        default="lanczos",
+        metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"},
+    )
+    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
+
+
+class IncDataset:
+    def __init__(
+        self,
+        dataset,
+        model,
+        label_names=None,
+    ):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(
+            model.SerializeToString(),
+            providers=onnxruntime.get_available_providers(),
+        )
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            onnx_inputs = []
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            """
+            LayoutLMV2 inputs (with order):
+            {
+                'input_ids': {0: 'batch_size', 1: 'sequence_length'},
+                'bbox': {0: 'batch_size', 1: 'sequence_length'},
+                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
+                'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
+            }
+            """
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    # onnx_inputs[key] = np.array([inputs[key]])
+                    onnx_inputs.append(np.array(inputs[key]))
+                    # self.onnx_inputs.append([np.array(inputs[key])])
+                elif key == "image":
+                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+                    onnx_inputs.append(np.array(inputs["images"], dtype=np.int64))
+                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
+
+            self.onnx_inputs.append(onnx_inputs)
+            onnx_inputs = []
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        (
+            model_args,
+            data_args,
+            training_args,
+        ) = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    if data_args.dataset_name == "funsd":
+        # datasets = load_dataset("nielsr/funsd")
+        import funsd
+
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
+    else:
+        raise NotImplementedError()
+
+    column_names = datasets["test"].column_names
+    features = datasets["test"].features
+
+    text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
+
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = sorted(unique_labels)
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        input_size=data_args.input_size,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        tokenizer_file=None,
+        # avoid loading from a cached file of the pre-trained model in another
+        # machine
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        add_prefix_space=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    if data_args.visual_embed:
+        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
+        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
+        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
+        common_transform = Compose(
+            [
+                # transforms.ColorJitter(0.4, 0.4, 0.4),
+                # transforms.RandomHorizontalFlip(p=0.5),
+                RandomResizedCropAndInterpolationWithTwoPic(
+                    size=data_args.input_size,
+                    interpolation=data_args.train_interpolation,
+                ),
+            ]
+        )
+
+        patch_transform = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
+            ]
+        )
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples, augmentation=False):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=False,
+            truncation=True,
+            return_overflowing_tokens=True,
+            boxes=examples[boxes_column_name],
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+
+            if data_args.visual_embed:
+                ipath = examples["image_path"][org_batch_index]
+                img = pil_loader(ipath)
+                for_patches, _ = common_transform(img, augmentation=augmentation)
+                patch = patch_transform(for_patches)
+                images.append(patch)
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        if data_args.visual_embed:
+            tokenized_inputs["images"] = images
+
+        return tokenized_inputs
+
+    validation_name = "test"
+    if validation_name not in datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_dataset = datasets[validation_name]
+    if data_args.max_val_samples is not None:
+        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
+    eval_dataset = eval_dataset.map(
+        tokenize_and_align_labels,
+        batched=True,
+        remove_columns=remove_columns,
+        num_proc=data_args.preprocessing_num_workers,
+        load_from_cache_file=not data_args.overwrite_cache,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Evaluation
+    from model import ORTModel
+
+    def eval_func(model):
+        logger.info("*** Evaluate ***")
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        return outputs.metrics["f1"]
+
+    if model_args.tune:
+        from neural_compressor import PostTrainingQuantConfig, quantization
+        from neural_compressor.utils.constant import FP32
+        import onnx
+
+        onnx_model = onnx.load(model_args.input_model)
+        calib_dataset = IncDataset(eval_dataset, onnx_model)
+        # TODO double check it for better perf
+        config = PostTrainingQuantConfig(
+            approach="dynamic",
+            quant_level=1,
+            quant_format=model_args.quant_format,
+        )
+        # recipes={'smooth_quant': True, 'smooth_quant_args': {'alpha': 0.5}},
+        # TODO double-check the sq, some issues
+        q_model = quantization.fit(
+            onnx_model,
+            config,
+            eval_func=eval_func,
+            calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset, batch_size=1),
+        )
+        q_model.save(model_args.save_path)
+
+    if model_args.benchmark:
+        import onnx
+        onnx_model = onnx.load(model_args.input_model)
+        if model_args.mode == "performance":
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+
+            b_dataset = IncDataset(eval_dataset, onnx_model)
+            conf = BenchmarkConfig(
+                iteration=100,
+                cores_per_instance=28,
+                num_of_instance=1,
+            )
+            b_dataloader = DataLoader(
+                framework="onnxruntime",
+                dataset=b_dataset,
+                batch_size=model_args.batch_size,
+            )
+            fit(onnx_model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == "accuracy":
+            eval_f1 = eval_func(onnx_model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py
index ed9e4576162..d9e1f919748 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/model.py
@@ -1,92 +1,92 @@
-#!/usr/bin/env python
-# coding=utf-8
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            execution_provider (:obj:`str`, `optional`):
-                ONNX Runtime execution provider to use.
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(
-            model.SerializeToString(), providers=onnxruntime.get_available_providers()
-        )
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        onnx_inputs = {}
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            """
-            LayoutLMV2 inputs (with order):
-            {
-                'input_ids': {0: 'batch_size', 1: 'sequence_length'}, 
-                'bbox': {0: 'batch_size', 1: 'sequence_length'}, 
-                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
-                'attention_mask': {0: 'batch_size', 1: 'sequence_length'}, 
-            }
-            """
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    onnx_inputs[key] = np.array([inputs[key]])
-                elif key == "image":
-                    onnx_inputs[key] = np.array([inputs["images"]], dtype=np.int64)
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(
-            predictions=all_preds,
-            label_ids=all_labels,
-            metrics=metrics,
-            num_samples=len(dataset),
-        )
+#!/usr/bin/env python
+# coding=utf-8
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            execution_provider (:obj:`str`, `optional`):
+                ONNX Runtime execution provider to use.
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(
+            model.SerializeToString(), providers=onnxruntime.get_available_providers()
+        )
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        onnx_inputs = {}
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            """
+            LayoutLMV2 inputs (with order):
+            {
+                'input_ids': {0: 'batch_size', 1: 'sequence_length'}, 
+                'bbox': {0: 'batch_size', 1: 'sequence_length'}, 
+                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
+                'attention_mask': {0: 'batch_size', 1: 'sequence_length'}, 
+            }
+            """
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    onnx_inputs[key] = np.array([inputs[key]])
+                elif key == "image":
+                    onnx_inputs[key] = np.array([inputs["images"]], dtype=np.int64)
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(
+            predictions=all_preds,
+            label_ids=all_labels,
+            metrics=metrics,
+            num_samples=len(dataset),
+        )
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt
index 0058f979157..55309000436 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/requirements.txt
@@ -1,21 +1,21 @@
-accelerate
-datasets
-transformers
-seqeval
-tensorboard
-sentencepiece
-timm
-Pillow
-einops
-textdistance
-shapely
-protobuf==3.20.0
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
-onnxruntime-extensions; python_version < '3.11'
-pytesseract
-#detectron2  # TODO export require it, resolve it later
+accelerate
+datasets
+transformers
+seqeval
+tensorboard
+sentencepiece
+timm
+Pillow
+einops
+textdistance
+shapely
+protobuf==3.20.0
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
+onnxruntime-extensions; python_version < '3.11'
+pytesseract
+#detectron2  # TODO export require it, resolve it later
 # git+https://github.com/facebookresearch/detectron2.git
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py
index 6e9aa23e8c0..9f8af99d49c 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/funsd.py
@@ -1,135 +1,135 @@
-# coding=utf-8
-# Adapted from https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
-
-import json
-import os
-
-import datasets
-
-from PIL import Image
-import numpy as np
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-def load_image(image_path):
-    image = Image.open(image_path).convert("RGB")
-    w, h = image.size
-    return image, (w, h)
-
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """FUNSD dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "words": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            words = []
-            bboxes = []
-            ner_tags = []
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                words_example, label = item["words"], item["label"]
-                words_example = [w for w in words_example if w["text"].strip() != ""]
-                if len(words_example) == 0:
-                    continue
-                if label == "other":
-                    for w in words_example:
-                        words.append(w["text"])
-                        ner_tags.append("O")
-                        bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    words.append(words_example[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    bboxes.append(normalize_bbox(words_example[0]["box"], size))
-                    for w in words_example[1:]:
-                        words.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        bboxes.append(normalize_bbox(w["box"], size))
-            yield guid, {
-                "id": str(guid),
-                "words": words,
-                "bboxes": bboxes,
-                "ner_tags": ner_tags,
-                "image_path": image_path,
-            }
+# coding=utf-8
+# Adapted from https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+
+import json
+import os
+
+import datasets
+
+from PIL import Image
+import numpy as np
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+def load_image(image_path):
+    image = Image.open(image_path).convert("RGB")
+    w, h = image.size
+    return image, (w, h)
+
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """FUNSD dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "words": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            words = []
+            bboxes = []
+            ner_tags = []
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                words_example, label = item["words"], item["label"]
+                words_example = [w for w in words_example if w["text"].strip() != ""]
+                if len(words_example) == 0:
+                    continue
+                if label == "other":
+                    for w in words_example:
+                        words.append(w["text"])
+                        ner_tags.append("O")
+                        bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    words.append(words_example[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    bboxes.append(normalize_bbox(words_example[0]["box"], size))
+                    for w in words_example[1:]:
+                        words.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        bboxes.append(normalize_bbox(w["box"], size))
+            yield guid, {
+                "id": str(guid),
+                "words": words,
+                "bboxes": bboxes,
+                "ner_tags": ner_tags,
+                "image_path": image_path,
+            }
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py
index f9d39843faa..6bbed4f933b 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/image_utils.py
@@ -1,272 +1,272 @@
-# Adapted from https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/data/utils.py
-
-import torchvision.transforms.functional as F
-import warnings
-import math
-import random
-import numpy as np
-from PIL import Image
-import torch
-
-
-def crop(image, i, j, h, w, boxes=None):
-    cropped_image = F.crop(image, i, j, h, w)
-
-    if boxes is not None:
-        # Currently we cannot use this case since when some boxes is out of the cropped image,
-        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
-        # which haven't been implemented here
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        boxes = cropped_boxes.reshape(-1, 4)
-
-    return cropped_image, boxes
-
-
-def resize(image, size, interpolation, boxes=None):
-    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
-    # which is compatible with a square image size of 224x224
-    rescaled_image = F.resize(image, size, interpolation)
-
-    if boxes is None:
-        return rescaled_image, None
-
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-
-    # boxes = boxes.copy()
-    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
-    return rescaled_image, scaled_boxes
-
-
-def clamp(num, min_value, max_value):
-    return max(min(num, max_value), min_value)
-
-
-def get_bb(bb, page_size):
-    bbs = [float(j) for j in bb]
-    xs, ys = [], []
-    for i, b in enumerate(bbs):
-        if i % 2 == 0:
-            xs.append(b)
-        else:
-            ys.append(b)
-    (width, height) = page_size
-    return_bb = [
-        clamp(min(xs), 0, width - 1),
-        clamp(min(ys), 0, height - 1),
-        clamp(max(xs), 0, width - 1),
-        clamp(max(ys), 0, height - 1),
-    ]
-    return_bb = [
-        int(1000 * return_bb[0] / width),
-        int(1000 * return_bb[1] / height),
-        int(1000 * return_bb[2] / width),
-        int(1000 * return_bb[3] / height),
-    ]
-    return return_bb
-
-
-class ToNumpy:
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return np_img
-
-
-class ToTensor:
-    def __init__(self, dtype=torch.float32):
-        self.dtype = dtype
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return torch.from_numpy(np_img).to(dtype=self.dtype)
-
-
-_pil_interpolation_to_str = {
-    F.InterpolationMode.NEAREST: "F.InterpolationMode.NEAREST",
-    F.InterpolationMode.BILINEAR: "F.InterpolationMode.BILINEAR",
-    F.InterpolationMode.BICUBIC: "F.InterpolationMode.BICUBIC",
-    F.InterpolationMode.LANCZOS: "F.InterpolationMode.LANCZOS",
-    F.InterpolationMode.HAMMING: "F.InterpolationMode.HAMMING",
-    F.InterpolationMode.BOX: "F.InterpolationMode.BOX",
-}
-
-
-def _pil_interp(method):
-    if method == "bicubic":
-        return F.InterpolationMode.BICUBIC
-    elif method == "lanczos":
-        return F.InterpolationMode.LANCZOS
-    elif method == "hamming":
-        return F.InterpolationMode.HAMMING
-    else:
-        # default bilinear, do we want to allow nearest?
-        return F.InterpolationMode.BILINEAR
-
-
-class Compose:
-    """Composes several transforms together. This transform does not support torchscript.
-    Please, see the note below.
-
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.PILToTensor(),
-        >>>     transforms.ConvertImageDtype(torch.float),
-        >>> ])
-
-    .. note::
-        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
-
-        >>> transforms = torch.nn.Sequential(
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-        >>> )
-        >>> scripted_transforms = torch.jit.script(transforms)
-
-        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
-        `lambda` functions or ``PIL.Image``.
-
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img, augmentation=False, box=None):
-        for t in self.transforms:
-            img = t(img, augmentation, box)
-        return img
-
-
-class RandomResizedCropAndInterpolationWithTwoPic:
-    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
-    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
-    is finally resized to given size.
-    This is popularly used to train the Inception networks.
-    Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
-    """
-
-    def __init__(
-        self,
-        size,
-        second_size=None,
-        scale=(0.08, 1.0),
-        ratio=(3.0 / 4.0, 4.0 / 3.0),
-        interpolation="bilinear",
-        second_interpolation="lanczos",
-    ):
-        if isinstance(size, tuple):
-            self.size = size
-        else:
-            self.size = (size, size)
-        if second_size is not None:
-            if isinstance(second_size, tuple):
-                self.second_size = second_size
-            else:
-                self.second_size = (second_size, second_size)
-        else:
-            self.second_size = None
-        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
-
-        self.interpolation = _pil_interp(interpolation)
-        self.second_interpolation = _pil_interp(second_interpolation)
-        self.scale = scale
-        self.ratio = ratio
-
-    @staticmethod
-    def get_params(img, scale, ratio):
-        """Get parameters for ``crop`` for a random sized crop.
-        Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
-            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
-        Returns:
-            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
-                sized crop.
-        """
-        area = img.size[0] * img.size[1]
-
-        for attempt in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w <= img.size[0] and h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
-                return i, j, h, w
-
-        # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
-        if in_ratio < min(ratio):
-            w = img.size[0]
-            h = int(round(w / min(ratio)))
-        elif in_ratio > max(ratio):
-            h = img.size[1]
-            w = int(round(h * max(ratio)))
-        else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
-        return i, j, h, w
-
-    def __call__(self, img, augmentation=False, box=None):
-        """
-        Args:
-            img (PIL Image): Image to be cropped and resized.
-        Returns:
-            PIL Image: Randomly cropped and resized image.
-        """
-        if augmentation:
-            i, j, h, w = self.get_params(img, self.scale, self.ratio)
-            img = F.crop(img, i, j, h, w)
-            # img, box = crop(img, i, j, h, w, box)
-        img = F.resize(img, self.size, self.interpolation)
-        second_img = (
-            F.resize(img, self.second_size, self.second_interpolation) if self.second_size is not None else None
-        )
-        return img, second_img
-
-    def __repr__(self):
-        if isinstance(self.interpolation, (tuple, list)):
-            interpolate_str = " ".join([_pil_interpolation_to_str[x] for x in self.interpolation])
-        else:
-            interpolate_str = _pil_interpolation_to_str[self.interpolation]
-        format_string = self.__class__.__name__ + "(size={0}".format(self.size)
-        format_string += ", scale={0}".format(tuple(round(s, 4) for s in self.scale))
-        format_string += ", ratio={0}".format(tuple(round(r, 4) for r in self.ratio))
-        format_string += ", interpolation={0}".format(interpolate_str)
-        if self.second_size is not None:
-            format_string += ", second_size={0}".format(self.second_size)
-            format_string += ", second_interpolation={0}".format(_pil_interpolation_to_str[self.second_interpolation])
-        format_string += ")"
-        return format_string
-
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, "rb") as f:
-        img = Image.open(f)
-        return img.convert("RGB")
+# Adapted from https://github.com/microsoft/unilm/blob/master/layoutlmft/layoutlmft/data/utils.py
+
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+        int(1000 * return_bb[0] / width),
+        int(1000 * return_bb[1] / height),
+        int(1000 * return_bb[2] / width),
+        int(1000 * return_bb[3] / height),
+    ]
+    return return_bb
+
+
+class ToNumpy:
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: "F.InterpolationMode.NEAREST",
+    F.InterpolationMode.BILINEAR: "F.InterpolationMode.BILINEAR",
+    F.InterpolationMode.BICUBIC: "F.InterpolationMode.BICUBIC",
+    F.InterpolationMode.LANCZOS: "F.InterpolationMode.LANCZOS",
+    F.InterpolationMode.HAMMING: "F.InterpolationMode.HAMMING",
+    F.InterpolationMode.BOX: "F.InterpolationMode.BOX",
+}
+
+
+def _pil_interp(method):
+    if method == "bicubic":
+        return F.InterpolationMode.BICUBIC
+    elif method == "lanczos":
+        return F.InterpolationMode.LANCZOS
+    elif method == "hamming":
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(
+        self,
+        size,
+        second_size=None,
+        scale=(0.08, 1.0),
+        ratio=(3.0 / 4.0, 4.0 / 3.0),
+        interpolation="bilinear",
+        second_interpolation="lanczos",
+    ):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = (
+            F.resize(img, self.second_size, self.second_interpolation) if self.second_size is not None else None
+        )
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = " ".join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + "(size={0}".format(self.size)
+        format_string += ", scale={0}".format(tuple(round(s, 4) for s in self.scale))
+        format_string += ", ratio={0}".format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ", interpolation={0}".format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ", second_size={0}".format(self.second_size)
+            format_string += ", second_interpolation={0}".format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ")"
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, "rb") as f:
+        img = Image.open(f)
+        return img.convert("RGB")
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
index 5540f4c002d..759b9bdc88c 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/main.py
@@ -1,566 +1,566 @@
-#!/usr/bin/env python
-# coding=utf-8
-from image_utils import (
-    Compose,
-    RandomResizedCropAndInterpolationWithTwoPic,
-    pil_loader,
-)
-from torchvision import transforms
-from timm.data.constants import (
-    IMAGENET_DEFAULT_MEAN,
-    IMAGENET_DEFAULT_STD,
-    IMAGENET_INCEPTION_MEAN,
-    IMAGENET_INCEPTION_STD,
-)
-from neural_compressor.data import DataLoader
-import torch
-import onnxruntime
-import onnx
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-import transformers
-from datasets import ClassLabel, load_dataset, load_metric
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    Trainer,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-
-# Will error if the minimal version of Transformers is not installed.
-# Remove at your own risks.
-check_min_version("4.5.0")
-
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "Pretrained config name or path if not the same as model_name"},
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"},
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(default=None, metadata={"help": "Path to onnx model"})
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default="performance",
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-    quant_format: str = field(
-        default="QOperator",
-        metadata={"help": ("quant format")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default="funsd",
-        metadata={"help": "The name of the dataset to use (via the datasets library)."},
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None,
-        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."},
-    )
-    train_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "The input training data file (a csv or JSON file)."},
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False,
-        metadata={"help": "Overwrite the cached training and evaluation sets"},
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-    segment_level_layout: bool = field(default=True)
-    visual_embed: bool = field(default=True)
-    data_dir: Optional[str] = field(default=None)
-    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
-    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
-    train_interpolation: str = field(
-        default="bicubic",
-        metadata={"help": "Training interpolation (random, bilinear, bicubic)"},
-    )
-    second_interpolation: str = field(
-        default="lanczos",
-        metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"},
-    )
-    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
-
-
-class IncDataset:
-    def __init__(
-        self,
-        dataset,
-        model,
-        label_names=None,
-    ):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(
-            model.SerializeToString(),
-            providers=onnxruntime.get_available_providers(),
-        )
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            onnx_inputs = []
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            """
-            LayoutLMV2 inputs (with order):
-            {
-                'input_ids': {0: 'batch_size', 1: 'sequence_length'},
-                'bbox': {0: 'batch_size', 1: 'sequence_length'},
-                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
-                'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
-            }
-            """
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    # onnx_inputs[key] = np.array([inputs[key]])
-                    onnx_inputs.append(np.array(inputs[key]))
-                    # self.onnx_inputs.append([np.array(inputs[key])])
-                elif key == "image":
-                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-                    onnx_inputs.append(np.array(inputs["images"], dtype=np.int64))
-                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
-
-            self.onnx_inputs.append(onnx_inputs)
-            onnx_inputs = []
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        (
-            model_args,
-            data_args,
-            training_args,
-        ) = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    if data_args.dataset_name == "funsd":
-        # datasets = load_dataset("nielsr/funsd")
-        import funsd
-
-        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
-    else:
-        raise NotImplementedError()
-
-    column_names = datasets["test"].column_names
-    features = datasets["test"].features
-
-    text_column_name = "words" if "words" in column_names else "tokens"
-    boxes_column_name = "bboxes"
-
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = sorted(unique_labels)
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        input_size=data_args.input_size,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        tokenizer_file=None,
-        # avoid loading from a cached file of the pre-trained model in another
-        # machine
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        add_prefix_space=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    if data_args.visual_embed:
-        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
-        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
-        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
-        common_transform = Compose(
-            [
-                # transforms.ColorJitter(0.4, 0.4, 0.4),
-                # transforms.RandomHorizontalFlip(p=0.5),
-                RandomResizedCropAndInterpolationWithTwoPic(
-                    size=data_args.input_size,
-                    interpolation=data_args.train_interpolation,
-                ),
-            ]
-        )
-
-        patch_transform = transforms.Compose(
-            [
-                transforms.ToTensor(),
-                transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
-            ]
-        )
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples, augmentation=False):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=False,
-            truncation=True,
-            return_overflowing_tokens=True,
-            boxes=examples[boxes_column_name],
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-
-            if data_args.visual_embed:
-                ipath = examples["image_path"][org_batch_index]
-                img = pil_loader(ipath)
-                for_patches, _ = common_transform(img, augmentation=augmentation)
-                patch = patch_transform(for_patches)
-                images.append(patch)
-
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        if data_args.visual_embed:
-            tokenized_inputs["images"] = images
-
-        return tokenized_inputs
-
-    validation_name = "test"
-    if validation_name not in datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_dataset = datasets[validation_name]
-    if data_args.max_val_samples is not None:
-        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
-    eval_dataset = eval_dataset.map(
-        tokenize_and_align_labels,
-        batched=True,
-        remove_columns=remove_columns,
-        num_proc=data_args.preprocessing_num_workers,
-        load_from_cache_file=not data_args.overwrite_cache,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Evaluation
-    from model import ORTModel
-
-    def eval_func(model):
-        logger.info("*** Evaluate ***")
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        return outputs.metrics["f1"]
-
-    if model_args.tune:
-        from neural_compressor import PostTrainingQuantConfig, quantization
-        from neural_compressor.utils.constant import FP32
-        import onnx
-
-        onnx_model = onnx.load(model_args.input_model)
-        calib_dataset = IncDataset(eval_dataset, onnx_model)
-        # TODO double check it for better perf
-        fp32_op_names = [
-            "/layoutlmv2/encoder/layer.\\d+/output/dense/MatMul",
-        ]
-        config = PostTrainingQuantConfig(
-            approach="static",
-            quant_level=1,
-            quant_format=model_args.quant_format,
-            op_name_dict={op_name: FP32 for op_name in fp32_op_names},
-        )
-        # recipes={'smooth_quant': True, 'smooth_quant_args': {'alpha': 0.5}},
-        # TODO double-check the sq, some issues
-        q_model = quantization.fit(
-            onnx_model,
-            config,
-            eval_func=eval_func,
-            calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset, batch_size=1),
-        )
-        q_model.save(model_args.save_path)
-
-    if model_args.benchmark:
-        import onnx
-        onnx_model = onnx.load(model_args.input_model)
-        if model_args.mode == "performance":
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-
-            b_dataset = IncDataset(eval_dataset, onnx_model)
-            conf = BenchmarkConfig(
-                iteration=100,
-                cores_per_instance=28,
-                num_of_instance=1,
-            )
-            b_dataloader = DataLoader(
-                framework="onnxruntime",
-                dataset=b_dataset,
-                batch_size=model_args.batch_size,
-            )
-            fit(onnx_model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == "accuracy":
-            eval_f1 = eval_func(onnx_model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+from image_utils import (
+    Compose,
+    RandomResizedCropAndInterpolationWithTwoPic,
+    pil_loader,
+)
+from torchvision import transforms
+from timm.data.constants import (
+    IMAGENET_DEFAULT_MEAN,
+    IMAGENET_DEFAULT_STD,
+    IMAGENET_INCEPTION_MEAN,
+    IMAGENET_INCEPTION_STD,
+)
+from neural_compressor.data import DataLoader
+import torch
+import onnxruntime
+import onnx
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+import transformers
+from datasets import ClassLabel, load_dataset, load_metric
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+
+# Will error if the minimal version of Transformers is not installed.
+# Remove at your own risks.
+check_min_version("4.5.0")
+
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "Pretrained config name or path if not the same as model_name"},
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"},
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(default=None, metadata={"help": "Path to onnx model"})
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default="performance",
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+    quant_format: str = field(
+        default="QOperator",
+        metadata={"help": ("quant format")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default="funsd",
+        metadata={"help": "The name of the dataset to use (via the datasets library)."},
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None,
+        metadata={"help": "The configuration name of the dataset to use (via the datasets library)."},
+    )
+    train_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "The input training data file (a csv or JSON file)."},
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False,
+        metadata={"help": "Overwrite the cached training and evaluation sets"},
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+    segment_level_layout: bool = field(default=True)
+    visual_embed: bool = field(default=True)
+    data_dir: Optional[str] = field(default=None)
+    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
+    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
+    train_interpolation: str = field(
+        default="bicubic",
+        metadata={"help": "Training interpolation (random, bilinear, bicubic)"},
+    )
+    second_interpolation: str = field(
+        default="lanczos",
+        metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"},
+    )
+    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
+
+
+class IncDataset:
+    def __init__(
+        self,
+        dataset,
+        model,
+        label_names=None,
+    ):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(
+            model.SerializeToString(),
+            providers=onnxruntime.get_available_providers(),
+        )
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            onnx_inputs = []
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            """
+            LayoutLMV2 inputs (with order):
+            {
+                'input_ids': {0: 'batch_size', 1: 'sequence_length'},
+                'bbox': {0: 'batch_size', 1: 'sequence_length'},
+                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
+                'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
+            }
+            """
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    # onnx_inputs[key] = np.array([inputs[key]])
+                    onnx_inputs.append(np.array(inputs[key]))
+                    # self.onnx_inputs.append([np.array(inputs[key])])
+                elif key == "image":
+                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+                    onnx_inputs.append(np.array(inputs["images"], dtype=np.int64))
+                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
+
+            self.onnx_inputs.append(onnx_inputs)
+            onnx_inputs = []
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        (
+            model_args,
+            data_args,
+            training_args,
+        ) = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    if data_args.dataset_name == "funsd":
+        # datasets = load_dataset("nielsr/funsd")
+        import funsd
+
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
+    else:
+        raise NotImplementedError()
+
+    column_names = datasets["test"].column_names
+    features = datasets["test"].features
+
+    text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
+
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = sorted(unique_labels)
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        input_size=data_args.input_size,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        tokenizer_file=None,
+        # avoid loading from a cached file of the pre-trained model in another
+        # machine
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        add_prefix_space=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    if data_args.visual_embed:
+        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
+        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
+        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
+        common_transform = Compose(
+            [
+                # transforms.ColorJitter(0.4, 0.4, 0.4),
+                # transforms.RandomHorizontalFlip(p=0.5),
+                RandomResizedCropAndInterpolationWithTwoPic(
+                    size=data_args.input_size,
+                    interpolation=data_args.train_interpolation,
+                ),
+            ]
+        )
+
+        patch_transform = transforms.Compose(
+            [
+                transforms.ToTensor(),
+                transforms.Normalize(mean=torch.tensor(mean), std=torch.tensor(std)),
+            ]
+        )
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples, augmentation=False):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=False,
+            truncation=True,
+            return_overflowing_tokens=True,
+            boxes=examples[boxes_column_name],
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+
+            if data_args.visual_embed:
+                ipath = examples["image_path"][org_batch_index]
+                img = pil_loader(ipath)
+                for_patches, _ = common_transform(img, augmentation=augmentation)
+                patch = patch_transform(for_patches)
+                images.append(patch)
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        if data_args.visual_embed:
+            tokenized_inputs["images"] = images
+
+        return tokenized_inputs
+
+    validation_name = "test"
+    if validation_name not in datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_dataset = datasets[validation_name]
+    if data_args.max_val_samples is not None:
+        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
+    eval_dataset = eval_dataset.map(
+        tokenize_and_align_labels,
+        batched=True,
+        remove_columns=remove_columns,
+        num_proc=data_args.preprocessing_num_workers,
+        load_from_cache_file=not data_args.overwrite_cache,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Evaluation
+    from model import ORTModel
+
+    def eval_func(model):
+        logger.info("*** Evaluate ***")
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        return outputs.metrics["f1"]
+
+    if model_args.tune:
+        from neural_compressor import PostTrainingQuantConfig, quantization
+        from neural_compressor.utils.constant import FP32
+        import onnx
+
+        onnx_model = onnx.load(model_args.input_model)
+        calib_dataset = IncDataset(eval_dataset, onnx_model)
+        # TODO double check it for better perf
+        fp32_op_names = [
+            "/layoutlmv2/encoder/layer.\\d+/output/dense/MatMul",
+        ]
+        config = PostTrainingQuantConfig(
+            approach="static",
+            quant_level=1,
+            quant_format=model_args.quant_format,
+            op_name_dict={op_name: FP32 for op_name in fp32_op_names},
+        )
+        # recipes={'smooth_quant': True, 'smooth_quant_args': {'alpha': 0.5}},
+        # TODO double-check the sq, some issues
+        q_model = quantization.fit(
+            onnx_model,
+            config,
+            eval_func=eval_func,
+            calib_dataloader=DataLoader(framework="onnxruntime", dataset=calib_dataset, batch_size=1),
+        )
+        q_model.save(model_args.save_path)
+
+    if model_args.benchmark:
+        import onnx
+        onnx_model = onnx.load(model_args.input_model)
+        if model_args.mode == "performance":
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+
+            b_dataset = IncDataset(eval_dataset, onnx_model)
+            conf = BenchmarkConfig(
+                iteration=100,
+                cores_per_instance=28,
+                num_of_instance=1,
+            )
+            b_dataloader = DataLoader(
+                framework="onnxruntime",
+                dataset=b_dataset,
+                batch_size=model_args.batch_size,
+            )
+            fit(onnx_model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == "accuracy":
+            eval_f1 = eval_func(onnx_model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py
index 95076d77bb8..b97a006a702 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/model.py
@@ -1,103 +1,103 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            execution_provider (:obj:`str`, `optional`):
-                ONNX Runtime execution provider to use.
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(
-            model.SerializeToString(), providers=onnxruntime.get_available_providers()
-        )
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        onnx_inputs = {}
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            """
-            LayoutLMV2 inputs (with order):
-            {
-                'input_ids': {0: 'batch_size', 1: 'sequence_length'}, 
-                'bbox': {0: 'batch_size', 1: 'sequence_length'}, 
-                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
-                'attention_mask': {0: 'batch_size', 1: 'sequence_length'}, 
-            }
-            """
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    onnx_inputs[key] = np.array([inputs[key]])
-                elif key == "image":
-                    onnx_inputs[key] = np.array([inputs["images"]], dtype=np.int64)
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(
-            predictions=all_preds,
-            label_ids=all_labels,
-            metrics=metrics,
-            num_samples=len(dataset),
-        )
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            execution_provider (:obj:`str`, `optional`):
+                ONNX Runtime execution provider to use.
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(
+            model.SerializeToString(), providers=onnxruntime.get_available_providers()
+        )
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        onnx_inputs = {}
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            """
+            LayoutLMV2 inputs (with order):
+            {
+                'input_ids': {0: 'batch_size', 1: 'sequence_length'}, 
+                'bbox': {0: 'batch_size', 1: 'sequence_length'}, 
+                'image': {0: 'batch_size', 1: 'num_channels'}, # dtype is np.int64 not float
+                'attention_mask': {0: 'batch_size', 1: 'sequence_length'}, 
+            }
+            """
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    onnx_inputs[key] = np.array([inputs[key]])
+                elif key == "image":
+                    onnx_inputs[key] = np.array([inputs["images"]], dtype=np.int64)
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(
+            predictions=all_preds,
+            label_ids=all_labels,
+            metrics=metrics,
+            num_samples=len(dataset),
+        )
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt
similarity index 94%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt
index 0058f979157..55309000436 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/requirements.txt
@@ -1,21 +1,21 @@
-accelerate
-datasets
-transformers
-seqeval
-tensorboard
-sentencepiece
-timm
-Pillow
-einops
-textdistance
-shapely
-protobuf==3.20.0
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
-onnxruntime-extensions; python_version < '3.11'
-pytesseract
-#detectron2  # TODO export require it, resolve it later
+accelerate
+datasets
+transformers
+seqeval
+tensorboard
+sentencepiece
+timm
+Pillow
+einops
+textdistance
+shapely
+protobuf==3.20.0
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
+onnxruntime-extensions; python_version < '3.11'
+pytesseract
+#detectron2  # TODO export require it, resolve it later
 # git+https://github.com/facebookresearch/detectron2.git
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv2/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
index 164714c7aaf..6cf416f4796 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/funsd.py
@@ -1,136 +1,136 @@
-# coding=utf-8
-'''
-Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
-'''
-import json
-import os
-
-import datasets
-
-from utils import load_image, normalize_bbox
-
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """Conll2003 dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "tokens": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def get_line_bbox(self, bboxs):
-        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
-        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
-
-        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
-
-        assert x1 >= x0 and y1 >= y0
-        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
-        return bbox
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            tokens = []
-            bboxes = []
-            ner_tags = []
-
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                cur_line_bboxes = []
-                words, label = item["words"], item["label"]
-                words = [w for w in words if w["text"].strip() != ""]
-                if len(words) == 0:
-                    continue
-                if label == "other":
-                    for w in words:
-                        tokens.append(w["text"])
-                        ner_tags.append("O")
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    tokens.append(words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
-                    for w in words[1:]:
-                        tokens.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                # by default: --segment_level_layout 1
-                # if do not want to use segment_level_layout, comment the following line
-                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
-                # box = normalize_bbox(item["box"], size)
-                # cur_line_bboxes = [box for _ in range(len(words))]
-                bboxes.extend(cur_line_bboxes)
-            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
+# coding=utf-8
+'''
+Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+'''
+import json
+import os
+
+import datasets
+
+from utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def get_line_bbox(self, bboxs):
+        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
+        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
+
+        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
+
+        assert x1 >= x0 and y1 >= y0
+        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
+        return bbox
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                cur_line_bboxes = []
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                # by default: --segment_level_layout 1
+                # if do not want to use segment_level_layout, comment the following line
+                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
+                # box = normalize_bbox(item["box"], size)
+                # cur_line_bboxes = [box for _ in range(len(words))]
+                bboxes.extend(cur_line_bboxes)
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
                          "image": image, "image_path": image_path}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
index 5bb42553e5b..25ea7892019 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/main.py
@@ -1,503 +1,503 @@
-#!/usr/bin/env python
-# coding=utf-8
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-from datasets import ClassLabel, load_dataset, load_metric
-
-import transformers
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    Trainer,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.5.0")
-
-from utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
-
-from timm.data.constants import \
-    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
-from torchvision import transforms
-import torch
-import onnxruntime
-import onnx
-
-from neural_compressor.data import DataLoader
-
-import pyarrow_hotfix; pyarrow_hotfix.uninstall()
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(
-        default=None,
-        metadata={"help": "Path to onnx model"}
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default='funsd', metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-    segment_level_layout: bool = field(default=True)
-    visual_embed: bool = field(default=True)
-    data_dir: Optional[str] = field(default=None)
-    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
-    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
-    train_interpolation: str = field(
-        default='bicubic', metadata={"help": "Training interpolation (random, bilinear, bicubic)"})
-    second_interpolation: str = field(
-        default='lanczos', metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"})
-    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
-
-
-class IncDataset():
-    def __init__(self, 
-                 dataset,
-                 model,
-                 label_names=None,):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            onnx_inputs = []    
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    # onnx_inputs[key] = np.array([inputs[key]])
-                    onnx_inputs.append(np.array(inputs[key]))
-                    # self.onnx_inputs.append([np.array(inputs[key])])
-                elif key == 'pixel_values':
-                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-                    onnx_inputs.append(np.array(inputs['images'], dtype=np.float32))
-                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
-
-            self.onnx_inputs.append(onnx_inputs)
-            onnx_inputs =[]
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    if data_args.dataset_name == 'funsd':
-        # datasets = load_dataset("nielsr/funsd")
-        import funsd
-        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
-    else:
-        raise NotImplementedError()
-
-    column_names = datasets["test"].column_names
-    features = datasets["test"].features
-
-    text_column_name = "words" if "words" in column_names else "tokens"
-    boxes_column_name = "bboxes"
-
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = list(unique_labels)
-        label_list.sort()
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        input_size=data_args.input_size,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        tokenizer_file=None,  # avoid loading from a cached file of the pre-trained model in another machine
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        add_prefix_space=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    if data_args.visual_embed:
-        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
-        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
-        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
-        common_transform = Compose([
-            # transforms.ColorJitter(0.4, 0.4, 0.4),
-            # transforms.RandomHorizontalFlip(p=0.5),
-            RandomResizedCropAndInterpolationWithTwoPic(
-                size=data_args.input_size, interpolation=data_args.train_interpolation),
-        ])
-
-        patch_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=torch.tensor(mean),
-                std=torch.tensor(std))
-        ])
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples, augmentation=False):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=False,
-            truncation=True,
-            return_overflowing_tokens=True,
-            boxes=examples[boxes_column_name],
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-
-            if data_args.visual_embed:
-                ipath = examples["image_path"][org_batch_index]
-                img = pil_loader(ipath)
-                for_patches, _ = common_transform(img, augmentation=augmentation)
-                patch = patch_transform(for_patches)
-                images.append(patch)
-
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        if data_args.visual_embed:
-            tokenized_inputs["images"] = images
-
-        return tokenized_inputs
-
-    validation_name = "test"
-    if validation_name not in datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_dataset = datasets[validation_name]
-    if data_args.max_val_samples is not None:
-        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
-    eval_dataset = eval_dataset.map(
-        tokenize_and_align_labels,
-        batched=True,
-        remove_columns=remove_columns,
-        num_proc=data_args.preprocessing_num_workers,
-        load_from_cache_file=not data_args.overwrite_cache,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Evaluation
-    from model import ORTModel
-
-    def eval_func(model):
-        logger.info("*** Evaluate ***")
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        return outputs.metrics['f1']
-
-
-    if model_args.tune:
-        onnx_model = onnx.load(model_args.input_model)
-
-        from neural_compressor import quantization, PostTrainingQuantConfig
-
-        calib_dataset = IncDataset(eval_dataset, onnx_model)
-        config = PostTrainingQuantConfig(approach='dynamic')
-        q_model = quantization.fit(onnx_model, 
-                                   config,
-                                   eval_func=eval_func)
-        q_model.save(model_args.save_path)
-    
-    if model_args.benchmark:
-        onnx_model = onnx.load(model_args.input_model)
-        if model_args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            b_dataset = IncDataset(eval_dataset, onnx_model)
-            conf = BenchmarkConfig(iteration=100,
-                                    cores_per_instance=28,
-                                    num_of_instance=1)
-            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-            fit(onnx_model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == 'accuracy':
-            eval_f1 = eval_func(onnx_model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-if __name__ == "__main__":
+#!/usr/bin/env python
+# coding=utf-8
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+from datasets import ClassLabel, load_dataset, load_metric
+
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.5.0")
+
+from utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
+
+from timm.data.constants import \
+    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
+from torchvision import transforms
+import torch
+import onnxruntime
+import onnx
+
+from neural_compressor.data import DataLoader
+
+import pyarrow_hotfix; pyarrow_hotfix.uninstall()
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(
+        default=None,
+        metadata={"help": "Path to onnx model"}
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default='funsd', metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+    segment_level_layout: bool = field(default=True)
+    visual_embed: bool = field(default=True)
+    data_dir: Optional[str] = field(default=None)
+    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
+    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
+    train_interpolation: str = field(
+        default='bicubic', metadata={"help": "Training interpolation (random, bilinear, bicubic)"})
+    second_interpolation: str = field(
+        default='lanczos', metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"})
+    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
+
+
+class IncDataset():
+    def __init__(self, 
+                 dataset,
+                 model,
+                 label_names=None,):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            onnx_inputs = []    
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    # onnx_inputs[key] = np.array([inputs[key]])
+                    onnx_inputs.append(np.array(inputs[key]))
+                    # self.onnx_inputs.append([np.array(inputs[key])])
+                elif key == 'pixel_values':
+                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+                    onnx_inputs.append(np.array(inputs['images'], dtype=np.float32))
+                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
+
+            self.onnx_inputs.append(onnx_inputs)
+            onnx_inputs =[]
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    if data_args.dataset_name == 'funsd':
+        # datasets = load_dataset("nielsr/funsd")
+        import funsd
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
+    else:
+        raise NotImplementedError()
+
+    column_names = datasets["test"].column_names
+    features = datasets["test"].features
+
+    text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
+
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = list(unique_labels)
+        label_list.sort()
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        input_size=data_args.input_size,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        tokenizer_file=None,  # avoid loading from a cached file of the pre-trained model in another machine
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        add_prefix_space=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    if data_args.visual_embed:
+        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
+        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
+        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
+        common_transform = Compose([
+            # transforms.ColorJitter(0.4, 0.4, 0.4),
+            # transforms.RandomHorizontalFlip(p=0.5),
+            RandomResizedCropAndInterpolationWithTwoPic(
+                size=data_args.input_size, interpolation=data_args.train_interpolation),
+        ])
+
+        patch_transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=torch.tensor(mean),
+                std=torch.tensor(std))
+        ])
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples, augmentation=False):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=False,
+            truncation=True,
+            return_overflowing_tokens=True,
+            boxes=examples[boxes_column_name],
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+
+            if data_args.visual_embed:
+                ipath = examples["image_path"][org_batch_index]
+                img = pil_loader(ipath)
+                for_patches, _ = common_transform(img, augmentation=augmentation)
+                patch = patch_transform(for_patches)
+                images.append(patch)
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        if data_args.visual_embed:
+            tokenized_inputs["images"] = images
+
+        return tokenized_inputs
+
+    validation_name = "test"
+    if validation_name not in datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_dataset = datasets[validation_name]
+    if data_args.max_val_samples is not None:
+        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
+    eval_dataset = eval_dataset.map(
+        tokenize_and_align_labels,
+        batched=True,
+        remove_columns=remove_columns,
+        num_proc=data_args.preprocessing_num_workers,
+        load_from_cache_file=not data_args.overwrite_cache,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Evaluation
+    from model import ORTModel
+
+    def eval_func(model):
+        logger.info("*** Evaluate ***")
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        return outputs.metrics['f1']
+
+
+    if model_args.tune:
+        onnx_model = onnx.load(model_args.input_model)
+
+        from neural_compressor import quantization, PostTrainingQuantConfig
+
+        calib_dataset = IncDataset(eval_dataset, onnx_model)
+        config = PostTrainingQuantConfig(approach='dynamic')
+        q_model = quantization.fit(onnx_model, 
+                                   config,
+                                   eval_func=eval_func)
+        q_model.save(model_args.save_path)
+    
+    if model_args.benchmark:
+        onnx_model = onnx.load(model_args.input_model)
+        if model_args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            b_dataset = IncDataset(eval_dataset, onnx_model)
+            conf = BenchmarkConfig(iteration=100,
+                                    cores_per_instance=28,
+                                    num_of_instance=1)
+            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+            fit(onnx_model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == 'accuracy':
+            eval_f1 = eval_func(onnx_model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+if __name__ == "__main__":
     main()
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py
index 6f0845c6843..004a9f17b66 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/model.py
@@ -1,88 +1,88 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            execution_provider (:obj:`str`, `optional`):
-                ONNX Runtime execution provider to use.
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        onnx_inputs = {}
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    onnx_inputs[key] = np.array([inputs[key]])
-                elif key == 'pixel_values':
-                    onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            execution_provider (:obj:`str`, `optional`):
+                ONNX Runtime execution provider to use.
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        onnx_inputs = {}
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    onnx_inputs[key] = np.array([inputs[key]])
+                elif key == 'pixel_values':
+                    onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
similarity index 92%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
index 80ac2e1ca53..3b0a527da51 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/requirements.txt
@@ -1,19 +1,19 @@
-accelerate
-datasets
-transformers
-seqeval
-tensorboard
-sentencepiece
-timm
-fvcore
-Pillow
-einops
-textdistance
-shapely
-protobuf
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
+accelerate
+datasets
+transformers
+seqeval
+tensorboard
+sentencepiece
+timm
+fvcore
+Pillow
+einops
+textdistance
+shapely
+protobuf
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py
index c8c36be4931..4635c541873 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_dynamic/utils.py
@@ -1,462 +1,462 @@
-import torchvision.transforms.functional as F
-import warnings
-import math
-import random
-import numpy as np
-from PIL import Image
-import torch
-
-from fvcore.transforms.transform import TransformList, Transform
-
-from iopath.common.file_io import PathManager as PathManagerBase
-PathManager = PathManagerBase()
-
-_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
-
-# https://www.exiv2.org/tags.html
-_EXIF_ORIENT = 274  # exif 'Orientation' tag
-
-def _apply_exif_orientation(image):
-    """
-    Applies the exif orientation correctly.
-
-    This code exists per the bug:
-      https://github.com/python-pillow/Pillow/issues/3973
-    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
-    various methods, especially `tobytes`
-
-    Function based on:
-      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
-      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
-
-    Args:
-        image (PIL.Image): a PIL image
-
-    Returns:
-        (PIL.Image): the PIL image with exif orientation applied, if applicable
-    """
-    if not hasattr(image, "getexif"):
-        return image
-
-    try:
-        exif = image.getexif()
-    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
-        exif = None
-
-    if exif is None:
-        return image
-
-    orientation = exif.get(_EXIF_ORIENT)
-
-    method = {
-        2: Image.FLIP_LEFT_RIGHT,
-        3: Image.ROTATE_180,
-        4: Image.FLIP_TOP_BOTTOM,
-        5: Image.TRANSPOSE,
-        6: Image.ROTATE_270,
-        7: Image.TRANSVERSE,
-        8: Image.ROTATE_90,
-    }.get(orientation)
-
-    if method is not None:
-        return image.transpose(method)
-    return image
-
-
-def convert_PIL_to_numpy(image, format):
-    """
-    Convert PIL image to numpy array of target format.
-
-    Args:
-        image (PIL.Image): a PIL image
-        format (str): the format of output image
-
-    Returns:
-        (np.ndarray): also see `read_image`
-    """
-    if format is not None:
-        # PIL only supports RGB, so convert to RGB and flip channels over below
-        conversion_format = format
-        if format in ["BGR", "YUV-BT.601"]:
-            conversion_format = "RGB"
-        image = image.convert(conversion_format)
-    image = np.asarray(image)
-    # PIL squeezes out the channel dimension for "L", so make it HWC
-    if format == "L":
-        image = np.expand_dims(image, -1)
-
-    # handle formats not supported by PIL
-    elif format == "BGR":
-        # flip channels if needed
-        image = image[:, :, ::-1]
-    elif format == "YUV-BT.601":
-        image = image / 255.0
-        image = np.dot(image, np.array(_M_RGB2YUV).T)
-
-    return image
-
-
-def read_image(file_name, format=None):
-    """
-    Read an image into the given format.
-    Will apply rotation and flipping if the image has such exif information.
-
-    Args:
-        file_name (str): image file path
-        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
-
-    Returns:
-        image (np.ndarray):
-            an HWC image in the given format, which is 0-255, uint8 for
-            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
-    """
-    with PathManager.open(file_name, "rb") as f:
-        image = Image.open(f)
-
-        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
-        image = _apply_exif_orientation(image)
-        return convert_PIL_to_numpy(image, format)
-
- 
-class ResizeTransform(Transform):
-    """
-    Resize the image to a target size.
-    """
-
-    def __init__(self, h, w, new_h, new_w, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            new_h, new_w (int): new image size
-            interp: PIL interpolation methods, defaults to bilinear.
-        """
-        # TODO decide on PIL vs opencv
-        super().__init__()
-        if interp is None:
-            interp = Image.BILINEAR
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        assert img.shape[:2] == (self.h, self.w)
-        assert len(img.shape) <= 4
-        interp_method = interp if interp is not None else self.interp
-
-        if img.dtype == np.uint8:
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                pil_image = Image.fromarray(img[:, :, 0], mode="L")
-            else:
-                pil_image = Image.fromarray(img)
-            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
-            ret = np.asarray(pil_image)
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                ret = np.expand_dims(ret, -1)
-        else:
-            # PIL only supports uint8
-            if any(x < 0 for x in img.strides):
-                img = np.ascontiguousarray(img)
-            img = torch.from_numpy(img)
-            shape = list(img.shape)
-            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
-            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
-            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
-                Image.NEAREST: "nearest",
-                Image.BILINEAR: "bilinear",
-                Image.BICUBIC: "bicubic",
-            }
-            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
-            align_corners = None if mode == "nearest" else False
-            img = torch.nn.functional.interpolate(
-                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
-            )
-            shape[:2] = (self.new_h, self.new_w)
-            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
-
-        return ret
-
-    def apply_coords(self, coords):
-        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
-        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-    def inverse(self):
-        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
-
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-
-def load_image(image_path):
-    image = read_image(image_path, format="BGR")
-    h = image.shape[0]
-    w = image.shape[1]
-    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
-    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
-    return image, (w, h)
-
-
-def crop(image, i, j, h, w, boxes=None):
-    cropped_image = F.crop(image, i, j, h, w)
-
-    if boxes is not None:
-        # Currently we cannot use this case since when some boxes is out of the cropped image,
-        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
-        # which haven't been implemented here
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        boxes = cropped_boxes.reshape(-1, 4)
-
-    return cropped_image, boxes
-
-
-def resize(image, size, interpolation, boxes=None):
-    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
-    # which is compatible with a square image size of 224x224
-    rescaled_image = F.resize(image, size, interpolation)
-
-    if boxes is None:
-        return rescaled_image, None
-
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-
-    # boxes = boxes.copy()
-    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
-    return rescaled_image, scaled_boxes
-
-
-def clamp(num, min_value, max_value):
-    return max(min(num, max_value), min_value)
-
-
-def get_bb(bb, page_size):
-    bbs = [float(j) for j in bb]
-    xs, ys = [], []
-    for i, b in enumerate(bbs):
-        if i % 2 == 0:
-            xs.append(b)
-        else:
-            ys.append(b)
-    (width, height) = page_size
-    return_bb = [
-        clamp(min(xs), 0, width - 1),
-        clamp(min(ys), 0, height - 1),
-        clamp(max(xs), 0, width - 1),
-        clamp(max(ys), 0, height - 1),
-    ]
-    return_bb = [
-            int(1000 * return_bb[0] / width),
-            int(1000 * return_bb[1] / height),
-            int(1000 * return_bb[2] / width),
-            int(1000 * return_bb[3] / height),
-        ]
-    return return_bb
-
-
-class ToNumpy:
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return np_img
-
-
-class ToTensor:
-
-    def __init__(self, dtype=torch.float32):
-        self.dtype = dtype
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return torch.from_numpy(np_img).to(dtype=self.dtype)
-
-
-_pil_interpolation_to_str = {
-    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
-    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
-    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
-    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
-    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
-    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
-}
-
-
-def _pil_interp(method):
-    if method == 'bicubic':
-        return F.InterpolationMode.BICUBIC
-    elif method == 'lanczos':
-        return F.InterpolationMode.LANCZOS
-    elif method == 'hamming':
-        return F.InterpolationMode.HAMMING
-    else:
-        # default bilinear, do we want to allow nearest?
-        return F.InterpolationMode.BILINEAR
-
-
-class Compose:
-    """Composes several transforms together. This transform does not support torchscript.
-    Please, see the note below.
-
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.PILToTensor(),
-        >>>     transforms.ConvertImageDtype(torch.float),
-        >>> ])
-
-    .. note::
-        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
-
-        >>> transforms = torch.nn.Sequential(
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-        >>> )
-        >>> scripted_transforms = torch.jit.script(transforms)
-
-        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
-        `lambda` functions or ``PIL.Image``.
-
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img, augmentation=False, box=None):
-        for t in self.transforms:
-            img = t(img, augmentation, box)
-        return img
-
-
-class RandomResizedCropAndInterpolationWithTwoPic:
-    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
-    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
-    is finally resized to given size.
-    This is popularly used to train the Inception networks.
-    Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
-    """
-
-    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
-                 interpolation='bilinear', second_interpolation='lanczos'):
-        if isinstance(size, tuple):
-            self.size = size
-        else:
-            self.size = (size, size)
-        if second_size is not None:
-            if isinstance(second_size, tuple):
-                self.second_size = second_size
-            else:
-                self.second_size = (second_size, second_size)
-        else:
-            self.second_size = None
-        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
-
-        self.interpolation = _pil_interp(interpolation)
-        self.second_interpolation = _pil_interp(second_interpolation)
-        self.scale = scale
-        self.ratio = ratio
-
-    @staticmethod
-    def get_params(img, scale, ratio):
-        """Get parameters for ``crop`` for a random sized crop.
-        Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
-            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
-        Returns:
-            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
-                sized crop.
-        """
-        area = img.size[0] * img.size[1]
-
-        for attempt in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w <= img.size[0] and h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
-                return i, j, h, w
-
-        # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
-        if in_ratio < min(ratio):
-            w = img.size[0]
-            h = int(round(w / min(ratio)))
-        elif in_ratio > max(ratio):
-            h = img.size[1]
-            w = int(round(h * max(ratio)))
-        else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
-        return i, j, h, w
-
-    def __call__(self, img, augmentation=False, box=None):
-        """
-        Args:
-            img (PIL Image): Image to be cropped and resized.
-        Returns:
-            PIL Image: Randomly cropped and resized image.
-        """
-        if augmentation:
-            i, j, h, w = self.get_params(img, self.scale, self.ratio)
-            img = F.crop(img, i, j, h, w)
-            # img, box = crop(img, i, j, h, w, box)
-        img = F.resize(img, self.size, self.interpolation)
-        second_img = F.resize(img, self.second_size, self.second_interpolation) \
-            if self.second_size is not None else None
-        return img, second_img
-
-    def __repr__(self):
-        if isinstance(self.interpolation, (tuple, list)):
-            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
-        else:
-            interpolate_str = _pil_interpolation_to_str[self.interpolation]
-        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
-        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
-        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
-        format_string += ', interpolation={0}'.format(interpolate_str)
-        if self.second_size is not None:
-            format_string += ', second_size={0}'.format(self.second_size)
-            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
-        format_string += ')'
-        return format_string
-
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, 'rb') as f:
-        img = Image.open(f)
-        return img.convert('RGB')
-    
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+from fvcore.transforms.transform import TransformList, Transform
+
+from iopath.common.file_io import PathManager as PathManagerBase
+PathManager = PathManagerBase()
+
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+
+ 
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = torch.nn.functional.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+            int(1000 * return_bb[0] / width),
+            int(1000 * return_bb[1] / height),
+            int(1000 * return_bb[2] / width),
+            int(1000 * return_bb[3] / height),
+        ]
+    return return_bb
+
+
+class ToNumpy:
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
+    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
+    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
+    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
+    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
+    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
+}
+
+
+def _pil_interp(method):
+    if method == 'bicubic':
+        return F.InterpolationMode.BICUBIC
+    elif method == 'lanczos':
+        return F.InterpolationMode.LANCZOS
+    elif method == 'hamming':
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
+                 interpolation='bilinear', second_interpolation='lanczos'):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = F.resize(img, self.second_size, self.second_interpolation) \
+            if self.second_size is not None else None
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
+        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ', interpolation={0}'.format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ', second_size={0}'.format(self.second_size)
+            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ')'
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+    
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
index 164714c7aaf..6cf416f4796 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/funsd.py
@@ -1,136 +1,136 @@
-# coding=utf-8
-'''
-Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
-'''
-import json
-import os
-
-import datasets
-
-from utils import load_image, normalize_bbox
-
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """Conll2003 dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "tokens": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def get_line_bbox(self, bboxs):
-        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
-        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
-
-        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
-
-        assert x1 >= x0 and y1 >= y0
-        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
-        return bbox
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            tokens = []
-            bboxes = []
-            ner_tags = []
-
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                cur_line_bboxes = []
-                words, label = item["words"], item["label"]
-                words = [w for w in words if w["text"].strip() != ""]
-                if len(words) == 0:
-                    continue
-                if label == "other":
-                    for w in words:
-                        tokens.append(w["text"])
-                        ner_tags.append("O")
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    tokens.append(words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
-                    for w in words[1:]:
-                        tokens.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                # by default: --segment_level_layout 1
-                # if do not want to use segment_level_layout, comment the following line
-                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
-                # box = normalize_bbox(item["box"], size)
-                # cur_line_bboxes = [box for _ in range(len(words))]
-                bboxes.extend(cur_line_bboxes)
-            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
+# coding=utf-8
+'''
+Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
+'''
+import json
+import os
+
+import datasets
+
+from utils import load_image, normalize_bbox
+
+
+logger = datasets.logging.get_logger(__name__)
+
+
+_CITATION = """\
+@article{Jaume2019FUNSDAD,
+  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
+  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
+  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
+  year={2019},
+  volume={2},
+  pages={1-6}
+}
+"""
+
+_DESCRIPTION = """\
+https://guillaumejaume.github.io/FUNSD/
+"""
+
+
+class FunsdConfig(datasets.BuilderConfig):
+    """BuilderConfig for FUNSD"""
+
+    def __init__(self, **kwargs):
+        """BuilderConfig for FUNSD.
+
+        Args:
+          **kwargs: keyword arguments forwarded to super.
+        """
+        super(FunsdConfig, self).__init__(**kwargs)
+
+
+class Funsd(datasets.GeneratorBasedBuilder):
+    """Conll2003 dataset."""
+
+    BUILDER_CONFIGS = [
+        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
+    ]
+
+    def _info(self):
+        return datasets.DatasetInfo(
+            description=_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "id": datasets.Value("string"),
+                    "tokens": datasets.Sequence(datasets.Value("string")),
+                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
+                    "ner_tags": datasets.Sequence(
+                        datasets.features.ClassLabel(
+                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
+                        )
+                    ),
+                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
+                    "image_path": datasets.Value("string"),
+                }
+            ),
+            supervised_keys=None,
+            homepage="https://guillaumejaume.github.io/FUNSD/",
+            citation=_CITATION,
+        )
+
+    def _split_generators(self, dl_manager):
+        """Returns SplitGenerators."""
+        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
+        return [
+            datasets.SplitGenerator(
+                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
+            ),
+            datasets.SplitGenerator(
+                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
+            ),
+        ]
+
+    def get_line_bbox(self, bboxs):
+        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
+        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
+
+        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
+
+        assert x1 >= x0 and y1 >= y0
+        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
+        return bbox
+
+    def _generate_examples(self, filepath):
+        logger.info("⏳ Generating examples from = %s", filepath)
+        ann_dir = os.path.join(filepath, "annotations")
+        img_dir = os.path.join(filepath, "images")
+        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
+            tokens = []
+            bboxes = []
+            ner_tags = []
+
+            file_path = os.path.join(ann_dir, file)
+            with open(file_path, "r", encoding="utf8") as f:
+                data = json.load(f)
+            image_path = os.path.join(img_dir, file)
+            image_path = image_path.replace("json", "png")
+            image, size = load_image(image_path)
+            for item in data["form"]:
+                cur_line_bboxes = []
+                words, label = item["words"], item["label"]
+                words = [w for w in words if w["text"].strip() != ""]
+                if len(words) == 0:
+                    continue
+                if label == "other":
+                    for w in words:
+                        tokens.append(w["text"])
+                        ner_tags.append("O")
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                else:
+                    tokens.append(words[0]["text"])
+                    ner_tags.append("B-" + label.upper())
+                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
+                    for w in words[1:]:
+                        tokens.append(w["text"])
+                        ner_tags.append("I-" + label.upper())
+                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
+                # by default: --segment_level_layout 1
+                # if do not want to use segment_level_layout, comment the following line
+                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
+                # box = normalize_bbox(item["box"], size)
+                # cur_line_bboxes = [box for _ in range(len(words))]
+                bboxes.extend(cur_line_bboxes)
+            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
                          "image": image, "image_path": image_path}
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
index ee6f45ade70..28d9f382b4f 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/main.py
@@ -1,515 +1,515 @@
-#!/usr/bin/env python
-# coding=utf-8
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-
-import numpy as np
-from datasets import ClassLabel, load_dataset, load_metric
-
-import transformers
-from transformers import (
-    AutoConfig,
-    AutoModelForTokenClassification,
-    AutoTokenizer,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    Trainer,
-    TrainingArguments,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint, is_main_process
-from transformers.utils import check_min_version
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.5.0")
-
-from utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
-
-from timm.data.constants import \
-    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
-from torchvision import transforms
-import torch
-import onnxruntime
-import onnx
-
-from neural_compressor.data import DataLoader
-
-import pyarrow_hotfix; pyarrow_hotfix.uninstall()
-
-logger = logging.getLogger(__name__)
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    input_model: str = field(
-        default=None,
-        metadata={"help": "Path to onnx model"}
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": ("INC tune")},
-    )
-    benchmark: bool = field(
-        default=False,
-        metadata={"help": ("INC benchmark")},
-    )
-    mode: str = field(
-        default='performance',
-        metadata={"help": ("INC benchmark mode")},
-    )
-    save_path: str = field(
-        default=None,
-        metadata={"help": ("onnx int8 model path")},
-    )
-    batch_size: int = field(
-        default=1,
-        metadata={"help": ("batch size for benchmark")},
-    )
-    quant_format: str = field(
-        default="QOperator",
-        metadata={"help": ("quant format")},
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
-    dataset_name: Optional[str] = field(
-        default='funsd', metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(
-        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
-    )
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to model maximum sentence length. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
-            "efficient on GPU but very bad for TPU."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_val_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
-            "value if set."
-        },
-    )
-    max_test_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
-            "value if set."
-        },
-    )
-    label_all_tokens: bool = field(
-        default=False,
-        metadata={
-            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
-            "one (in which case the other tokens will have a padding index)."
-        },
-    )
-    return_entity_level_metrics: bool = field(
-        default=False,
-        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
-    )
-    segment_level_layout: bool = field(default=True)
-    visual_embed: bool = field(default=True)
-    data_dir: Optional[str] = field(default=None)
-    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
-    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
-    train_interpolation: str = field(
-        default='bicubic', metadata={"help": "Training interpolation (random, bilinear, bicubic)"})
-    second_interpolation: str = field(
-        default='lanczos', metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"})
-    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
-
-
-class IncDataset():
-    def __init__(self, 
-                 dataset,
-                 model,
-                 label_names=None,):
-        self.dataset = dataset
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-        self._process_dataset()
-    
-    def _process_dataset(self):
-        self.label = []
-        self.onnx_inputs = []
-        for inputs in self.dataset:
-            onnx_inputs = []    
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            self.label.append(labels)
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    # onnx_inputs[key] = np.array([inputs[key]])
-                    onnx_inputs.append(np.array(inputs[key]))
-                    # self.onnx_inputs.append([np.array(inputs[key])])
-                elif key == 'pixel_values':
-                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-                    onnx_inputs.append(np.array(inputs['images'], dtype=np.float32))
-                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
-
-            self.onnx_inputs.append(onnx_inputs)
-            onnx_inputs =[]
-
-    def __getitem__(self, index):
-        return tuple(self.onnx_inputs[index]), self.label[index]
-
-    def __len__(self):
-        assert len(self.label) == len(self.onnx_inputs)
-        return len(self.onnx_inputs)
-
-def main():
-    # See all possible arguments in layoutlmft/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    # Set the verbosity to info of the Transformers logger (on main process only):
-    if is_main_process(training_args.local_rank):
-        transformers.utils.logging.set_verbosity_info()
-        transformers.utils.logging.enable_default_handler()
-        transformers.utils.logging.enable_explicit_format()
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    if data_args.dataset_name == 'funsd':
-        # datasets = load_dataset("nielsr/funsd")
-        import funsd
-        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
-    else:
-        raise NotImplementedError()
-
-    column_names = datasets["test"].column_names
-    features = datasets["test"].features
-
-    text_column_name = "words" if "words" in column_names else "tokens"
-    boxes_column_name = "bboxes"
-
-    label_column_name = (
-        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
-    )
-
-    remove_columns = column_names
-
-    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
-    # unique labels.
-    def get_label_list(labels):
-        unique_labels = set()
-        for label in labels:
-            unique_labels = unique_labels | set(label)
-        label_list = list(unique_labels)
-        label_list.sort()
-        return label_list
-
-    if isinstance(features[label_column_name].feature, ClassLabel):
-        label_list = features[label_column_name].feature.names
-        # No need to convert the labels since they are already ints.
-        label_to_id = {i: i for i in range(len(label_list))}
-    else:
-        label_list = get_label_list(datasets["train"][label_column_name])
-        label_to_id = {l: i for i, l in enumerate(label_list)}
-    num_labels = len(label_list)
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        num_labels=num_labels,
-        finetuning_task=data_args.task_name,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        input_size=data_args.input_size,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        tokenizer_file=None,  # avoid loading from a cached file of the pre-trained model in another machine
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        add_prefix_space=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the dataset
-    # Padding strategy
-    padding = "max_length" if data_args.pad_to_max_length else False
-
-    if data_args.visual_embed:
-        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
-        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
-        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
-        common_transform = Compose([
-            # transforms.ColorJitter(0.4, 0.4, 0.4),
-            # transforms.RandomHorizontalFlip(p=0.5),
-            RandomResizedCropAndInterpolationWithTwoPic(
-                size=data_args.input_size, interpolation=data_args.train_interpolation),
-        ])
-
-        patch_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=torch.tensor(mean),
-                std=torch.tensor(std))
-        ])
-
-    # Tokenize all texts and align the labels with them.
-    def tokenize_and_align_labels(examples, augmentation=False):
-        tokenized_inputs = tokenizer(
-            examples[text_column_name],
-            padding=False,
-            truncation=True,
-            return_overflowing_tokens=True,
-            boxes=examples[boxes_column_name],
-        )
-
-        labels = []
-        bboxes = []
-        images = []
-        for batch_index in range(len(tokenized_inputs["input_ids"])):
-            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
-            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
-
-            label = examples[label_column_name][org_batch_index]
-            bbox = examples["bboxes"][org_batch_index]
-            previous_word_idx = None
-            label_ids = []
-            bbox_inputs = []
-            for word_idx in word_ids:
-                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
-                # ignored in the loss function.
-                if word_idx is None:
-                    label_ids.append(-100)
-                    bbox_inputs.append([0, 0, 0, 0])
-                # We set the label for the first token of each word.
-                elif word_idx != previous_word_idx:
-                    label_ids.append(label_to_id[label[word_idx]])
-                    bbox_inputs.append(bbox[word_idx])
-                # For the other tokens in a word, we set the label to either the current label or -100, depending on
-                # the label_all_tokens flag.
-                else:
-                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
-                    bbox_inputs.append(bbox[word_idx])
-                previous_word_idx = word_idx
-            labels.append(label_ids)
-            bboxes.append(bbox_inputs)
-
-            if data_args.visual_embed:
-                ipath = examples["image_path"][org_batch_index]
-                img = pil_loader(ipath)
-                for_patches, _ = common_transform(img, augmentation=augmentation)
-                patch = patch_transform(for_patches)
-                images.append(patch)
-
-        tokenized_inputs["labels"] = labels
-        tokenized_inputs["bbox"] = bboxes
-        if data_args.visual_embed:
-            tokenized_inputs["images"] = images
-
-        return tokenized_inputs
-
-    validation_name = "test"
-    if validation_name not in datasets:
-        raise ValueError("--do_eval requires a validation dataset")
-    eval_dataset = datasets[validation_name]
-    if data_args.max_val_samples is not None:
-        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
-    eval_dataset = eval_dataset.map(
-        tokenize_and_align_labels,
-        batched=True,
-        remove_columns=remove_columns,
-        num_proc=data_args.preprocessing_num_workers,
-        load_from_cache_file=not data_args.overwrite_cache,
-    )
-
-    # Metrics
-    metric = load_metric("seqeval")
-
-    def compute_metrics(p):
-        predictions, labels = p
-        predictions = np.argmax(predictions, axis=2)
-
-        # Remove ignored index (special tokens)
-        true_predictions = [
-            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-        true_labels = [
-            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
-            for prediction, label in zip(predictions, labels)
-        ]
-
-        results = metric.compute(predictions=true_predictions, references=true_labels)
-        if data_args.return_entity_level_metrics:
-            # Unpack nested dictionaries
-            final_results = {}
-            for key, value in results.items():
-                if isinstance(value, dict):
-                    for n, v in value.items():
-                        final_results[f"{key}_{n}"] = v
-                else:
-                    final_results[key] = value
-            return final_results
-        else:
-            return {
-                "precision": results["overall_precision"],
-                "recall": results["overall_recall"],
-                "f1": results["overall_f1"],
-                "accuracy": results["overall_accuracy"],
-            }
-
-    # Evaluation
-    from model import ORTModel
-
-    def eval_func(model):
-        logger.info("*** Evaluate ***")
-        ort_model = ORTModel(
-            model,
-            compute_metrics=compute_metrics,
-        )
-        outputs = ort_model.evaluation_loop(eval_dataset)
-        return outputs.metrics['f1']
-
-
-    if model_args.tune:
-        onnx_model = onnx.load(model_args.input_model)
-        from neural_compressor import quantization, PostTrainingQuantConfig
-        from neural_compressor.utils.constant import FP32
-
-        calib_dataset = IncDataset(eval_dataset, onnx_model)
-        fp32_op_names = ['/layoutlmv3/encoder/layer.\d+/output/dense/MatMul',
-                         '/layoutlmv3/encoder/layer.\d+/intermediate/dense/MatMul',]
-        config = PostTrainingQuantConfig(approach='static',
-                                         quant_level=1,
-                                         quant_format=model_args.quant_format,
-                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names})
-        q_model = quantization.fit(onnx_model, 
-                                    config,
-                                    eval_func=eval_func,
-                                    calib_dataloader=DataLoader(framework='onnxruntime', 
-                                                                dataset=calib_dataset, 
-                                                                batch_size=1))
-        q_model.save(model_args.save_path)
-    
-    if model_args.benchmark:
-        onnx_model = onnx.load(model_args.input_model)
-        if model_args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            b_dataset = IncDataset(eval_dataset, onnx_model)
-            conf = BenchmarkConfig(iteration=100,
-                                    cores_per_instance=28,
-                                    num_of_instance=1,)
-            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
-            fit(onnx_model, conf, b_dataloader=b_dataloader)
-        elif model_args.mode == 'accuracy':
-            eval_f1 = eval_func(onnx_model)
-            print("Batch size = %d" % model_args.batch_size)
-            print("Accuracy: %.5f" % eval_f1)
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+
+import numpy as np
+from datasets import ClassLabel, load_dataset, load_metric
+
+import transformers
+from transformers import (
+    AutoConfig,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    Trainer,
+    TrainingArguments,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint, is_main_process
+from transformers.utils import check_min_version
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.5.0")
+
+from utils import RandomResizedCropAndInterpolationWithTwoPic, pil_loader, Compose
+
+from timm.data.constants import \
+    IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD, IMAGENET_INCEPTION_MEAN, IMAGENET_INCEPTION_STD
+from torchvision import transforms
+import torch
+import onnxruntime
+import onnx
+
+from neural_compressor.data import DataLoader
+
+import pyarrow_hotfix; pyarrow_hotfix.uninstall()
+
+logger = logging.getLogger(__name__)
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Where do you want to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    input_model: str = field(
+        default=None,
+        metadata={"help": "Path to onnx model"}
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": ("INC tune")},
+    )
+    benchmark: bool = field(
+        default=False,
+        metadata={"help": ("INC benchmark")},
+    )
+    mode: str = field(
+        default='performance',
+        metadata={"help": ("INC benchmark mode")},
+    )
+    save_path: str = field(
+        default=None,
+        metadata={"help": ("onnx int8 model path")},
+    )
+    batch_size: int = field(
+        default=1,
+        metadata={"help": ("batch size for benchmark")},
+    )
+    quant_format: str = field(
+        default="QOperator",
+        metadata={"help": ("quant format")},
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    task_name: Optional[str] = field(default="ner", metadata={"help": "The name of the task (ner, pos...)."})
+    dataset_name: Optional[str] = field(
+        default='funsd', metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(
+        default=None, metadata={"help": "The input training data file (a csv or JSON file)."}
+    )
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate on (a csv or JSON file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to predict on (a csv or JSON file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to model maximum sentence length. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch. More "
+            "efficient on GPU but very bad for TPU."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_val_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of validation examples to this "
+            "value if set."
+        },
+    )
+    max_test_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of test examples to this "
+            "value if set."
+        },
+    )
+    label_all_tokens: bool = field(
+        default=False,
+        metadata={
+            "help": "Whether to put the label for one word on all tokens of generated by that word or just on the "
+            "one (in which case the other tokens will have a padding index)."
+        },
+    )
+    return_entity_level_metrics: bool = field(
+        default=False,
+        metadata={"help": "Whether to return all the entity levels during evaluation or just the overall ones."},
+    )
+    segment_level_layout: bool = field(default=True)
+    visual_embed: bool = field(default=True)
+    data_dir: Optional[str] = field(default=None)
+    input_size: int = field(default=224, metadata={"help": "images input size for backbone"})
+    second_input_size: int = field(default=112, metadata={"help": "images input size for discrete vae"})
+    train_interpolation: str = field(
+        default='bicubic', metadata={"help": "Training interpolation (random, bilinear, bicubic)"})
+    second_interpolation: str = field(
+        default='lanczos', metadata={"help": "Interpolation for discrete vae (random, bilinear, bicubic)"})
+    imagenet_default_mean_and_std: bool = field(default=False, metadata={"help": ""})
+
+
+class IncDataset():
+    def __init__(self, 
+                 dataset,
+                 model,
+                 label_names=None,):
+        self.dataset = dataset
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(),
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+        self._process_dataset()
+    
+    def _process_dataset(self):
+        self.label = []
+        self.onnx_inputs = []
+        for inputs in self.dataset:
+            onnx_inputs = []    
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            self.label.append(labels)
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    # onnx_inputs[key] = np.array([inputs[key]])
+                    onnx_inputs.append(np.array(inputs[key]))
+                    # self.onnx_inputs.append([np.array(inputs[key])])
+                elif key == 'pixel_values':
+                    # onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+                    onnx_inputs.append(np.array(inputs['images'], dtype=np.float32))
+                    # self.onnx_inputs.append([np.array(inputs['images'], dtype=np.float32)])
+
+            self.onnx_inputs.append(onnx_inputs)
+            onnx_inputs =[]
+
+    def __getitem__(self, index):
+        return tuple(self.onnx_inputs[index]), self.label[index]
+
+    def __len__(self):
+        assert len(self.label) == len(self.onnx_inputs)
+        return len(self.onnx_inputs)
+
+def main():
+    # See all possible arguments in layoutlmft/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1]))
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+    logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN)
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    # Set the verbosity to info of the Transformers logger (on main process only):
+    if is_main_process(training_args.local_rank):
+        transformers.utils.logging.set_verbosity_info()
+        transformers.utils.logging.enable_default_handler()
+        transformers.utils.logging.enable_explicit_format()
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    if data_args.dataset_name == 'funsd':
+        # datasets = load_dataset("nielsr/funsd")
+        import funsd
+        datasets = load_dataset(os.path.abspath(funsd.__file__), cache_dir=model_args.cache_dir)
+    else:
+        raise NotImplementedError()
+
+    column_names = datasets["test"].column_names
+    features = datasets["test"].features
+
+    text_column_name = "words" if "words" in column_names else "tokens"
+    boxes_column_name = "bboxes"
+
+    label_column_name = (
+        f"{data_args.task_name}_tags" if f"{data_args.task_name}_tags" in column_names else column_names[1]
+    )
+
+    remove_columns = column_names
+
+    # In the event the labels are not a `Sequence[ClassLabel]`, we will need to go through the dataset to get the
+    # unique labels.
+    def get_label_list(labels):
+        unique_labels = set()
+        for label in labels:
+            unique_labels = unique_labels | set(label)
+        label_list = list(unique_labels)
+        label_list.sort()
+        return label_list
+
+    if isinstance(features[label_column_name].feature, ClassLabel):
+        label_list = features[label_column_name].feature.names
+        # No need to convert the labels since they are already ints.
+        label_to_id = {i: i for i in range(len(label_list))}
+    else:
+        label_list = get_label_list(datasets["train"][label_column_name])
+        label_to_id = {l: i for i, l in enumerate(label_list)}
+    num_labels = len(label_list)
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        num_labels=num_labels,
+        finetuning_task=data_args.task_name,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        input_size=data_args.input_size,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        tokenizer_file=None,  # avoid loading from a cached file of the pre-trained model in another machine
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        add_prefix_space=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#bigtable to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the dataset
+    # Padding strategy
+    padding = "max_length" if data_args.pad_to_max_length else False
+
+    if data_args.visual_embed:
+        imagenet_default_mean_and_std = data_args.imagenet_default_mean_and_std
+        mean = IMAGENET_INCEPTION_MEAN if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_MEAN
+        std = IMAGENET_INCEPTION_STD if not imagenet_default_mean_and_std else IMAGENET_DEFAULT_STD
+        common_transform = Compose([
+            # transforms.ColorJitter(0.4, 0.4, 0.4),
+            # transforms.RandomHorizontalFlip(p=0.5),
+            RandomResizedCropAndInterpolationWithTwoPic(
+                size=data_args.input_size, interpolation=data_args.train_interpolation),
+        ])
+
+        patch_transform = transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize(
+                mean=torch.tensor(mean),
+                std=torch.tensor(std))
+        ])
+
+    # Tokenize all texts and align the labels with them.
+    def tokenize_and_align_labels(examples, augmentation=False):
+        tokenized_inputs = tokenizer(
+            examples[text_column_name],
+            padding=False,
+            truncation=True,
+            return_overflowing_tokens=True,
+            boxes=examples[boxes_column_name],
+        )
+
+        labels = []
+        bboxes = []
+        images = []
+        for batch_index in range(len(tokenized_inputs["input_ids"])):
+            word_ids = tokenized_inputs.word_ids(batch_index=batch_index)
+            org_batch_index = tokenized_inputs["overflow_to_sample_mapping"][batch_index]
+
+            label = examples[label_column_name][org_batch_index]
+            bbox = examples["bboxes"][org_batch_index]
+            previous_word_idx = None
+            label_ids = []
+            bbox_inputs = []
+            for word_idx in word_ids:
+                # Special tokens have a word id that is None. We set the label to -100 so they are automatically
+                # ignored in the loss function.
+                if word_idx is None:
+                    label_ids.append(-100)
+                    bbox_inputs.append([0, 0, 0, 0])
+                # We set the label for the first token of each word.
+                elif word_idx != previous_word_idx:
+                    label_ids.append(label_to_id[label[word_idx]])
+                    bbox_inputs.append(bbox[word_idx])
+                # For the other tokens in a word, we set the label to either the current label or -100, depending on
+                # the label_all_tokens flag.
+                else:
+                    label_ids.append(label_to_id[label[word_idx]] if data_args.label_all_tokens else -100)
+                    bbox_inputs.append(bbox[word_idx])
+                previous_word_idx = word_idx
+            labels.append(label_ids)
+            bboxes.append(bbox_inputs)
+
+            if data_args.visual_embed:
+                ipath = examples["image_path"][org_batch_index]
+                img = pil_loader(ipath)
+                for_patches, _ = common_transform(img, augmentation=augmentation)
+                patch = patch_transform(for_patches)
+                images.append(patch)
+
+        tokenized_inputs["labels"] = labels
+        tokenized_inputs["bbox"] = bboxes
+        if data_args.visual_embed:
+            tokenized_inputs["images"] = images
+
+        return tokenized_inputs
+
+    validation_name = "test"
+    if validation_name not in datasets:
+        raise ValueError("--do_eval requires a validation dataset")
+    eval_dataset = datasets[validation_name]
+    if data_args.max_val_samples is not None:
+        eval_dataset = eval_dataset.select(range(data_args.max_val_samples))
+    eval_dataset = eval_dataset.map(
+        tokenize_and_align_labels,
+        batched=True,
+        remove_columns=remove_columns,
+        num_proc=data_args.preprocessing_num_workers,
+        load_from_cache_file=not data_args.overwrite_cache,
+    )
+
+    # Metrics
+    metric = load_metric("seqeval")
+
+    def compute_metrics(p):
+        predictions, labels = p
+        predictions = np.argmax(predictions, axis=2)
+
+        # Remove ignored index (special tokens)
+        true_predictions = [
+            [label_list[p] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+        true_labels = [
+            [label_list[l] for (p, l) in zip(prediction, label) if l != -100]
+            for prediction, label in zip(predictions, labels)
+        ]
+
+        results = metric.compute(predictions=true_predictions, references=true_labels)
+        if data_args.return_entity_level_metrics:
+            # Unpack nested dictionaries
+            final_results = {}
+            for key, value in results.items():
+                if isinstance(value, dict):
+                    for n, v in value.items():
+                        final_results[f"{key}_{n}"] = v
+                else:
+                    final_results[key] = value
+            return final_results
+        else:
+            return {
+                "precision": results["overall_precision"],
+                "recall": results["overall_recall"],
+                "f1": results["overall_f1"],
+                "accuracy": results["overall_accuracy"],
+            }
+
+    # Evaluation
+    from model import ORTModel
+
+    def eval_func(model):
+        logger.info("*** Evaluate ***")
+        ort_model = ORTModel(
+            model,
+            compute_metrics=compute_metrics,
+        )
+        outputs = ort_model.evaluation_loop(eval_dataset)
+        return outputs.metrics['f1']
+
+
+    if model_args.tune:
+        onnx_model = onnx.load(model_args.input_model)
+        from neural_compressor import quantization, PostTrainingQuantConfig
+        from neural_compressor.utils.constant import FP32
+
+        calib_dataset = IncDataset(eval_dataset, onnx_model)
+        fp32_op_names = ['/layoutlmv3/encoder/layer.\d+/output/dense/MatMul',
+                         '/layoutlmv3/encoder/layer.\d+/intermediate/dense/MatMul',]
+        config = PostTrainingQuantConfig(approach='static',
+                                         quant_level=1,
+                                         quant_format=model_args.quant_format,
+                                         op_name_dict={op_name:FP32 for op_name in fp32_op_names})
+        q_model = quantization.fit(onnx_model, 
+                                    config,
+                                    eval_func=eval_func,
+                                    calib_dataloader=DataLoader(framework='onnxruntime', 
+                                                                dataset=calib_dataset, 
+                                                                batch_size=1))
+        q_model.save(model_args.save_path)
+    
+    if model_args.benchmark:
+        onnx_model = onnx.load(model_args.input_model)
+        if model_args.mode == 'performance':
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            b_dataset = IncDataset(eval_dataset, onnx_model)
+            conf = BenchmarkConfig(iteration=100,
+                                    cores_per_instance=28,
+                                    num_of_instance=1,)
+            b_dataloader = DataLoader(framework='onnxruntime', dataset=b_dataset, batch_size=model_args.batch_size)
+            fit(onnx_model, conf, b_dataloader=b_dataloader)
+        elif model_args.mode == 'accuracy':
+            eval_f1 = eval_func(onnx_model)
+            print("Batch size = %d" % model_args.batch_size)
+            print("Accuracy: %.5f" % eval_f1)
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py
index 6f0845c6843..004a9f17b66 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/model.py
@@ -1,88 +1,88 @@
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-import logging
-import os
-from typing import Callable, Dict, List, Optional, Union
-
-import numpy as np
-from datasets import Dataset
-from transformers import EvalPrediction
-from transformers.trainer_pt_utils import nested_concat
-from transformers.trainer_utils import EvalLoopOutput
-import onnxruntime
-
-logger = logging.getLogger(__name__)
-
-class ORTModel:
-    def __init__(
-        self,
-        model: Union[str, os.PathLike],
-        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
-        label_names: Optional[List[str]] = None,
-    ):
-        """
-        Args:
-            model_path (`Union[str, os.PathLike]`):
-                The path to the model ONNX Intermediate Representation (IR).
-            execution_provider (:obj:`str`, `optional`):
-                ONNX Runtime execution provider to use.
-            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
-                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
-                return a dictionary string to metric values.
-            label_names (`List[str]`, `optional`):
-                The list of keys in your dictionary of inputs that correspond to the labels.
-        """
-        self.compute_metrics = compute_metrics
-        self.label_names = ["labels"] if label_names is None else label_names
-        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
-                                                    providers=onnxruntime.get_available_providers())
-        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
-
-    def evaluation_loop(self, dataset: Dataset):
-        """
-        Run evaluation and returns metrics and predictions.
-
-        Args:
-            dataset (`datasets.Dataset`):
-                Dataset to use for the evaluation step.
-        """
-        logger.info("***** Running evaluation *****")
-        all_preds = None
-        all_labels = None
-        onnx_inputs = {}
-        for step, inputs in enumerate(dataset):
-            has_labels = all(inputs.get(k) is not None for k in self.label_names)
-            if has_labels:
-                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
-                if len(labels) == 1:
-                    labels = labels[0]
-            else:
-                labels = None
-            
-            for key in self.onnx_input_names:
-                if key in inputs:
-                    onnx_inputs[key] = np.array([inputs[key]])
-                elif key == 'pixel_values':
-                    onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
-            preds = self.session.run(None, onnx_inputs)
-            if len(preds) == 1:
-                preds = preds[0]
-            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
-            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
-        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
-            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
-        else:
-            metrics = {}
-        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import logging
+import os
+from typing import Callable, Dict, List, Optional, Union
+
+import numpy as np
+from datasets import Dataset
+from transformers import EvalPrediction
+from transformers.trainer_pt_utils import nested_concat
+from transformers.trainer_utils import EvalLoopOutput
+import onnxruntime
+
+logger = logging.getLogger(__name__)
+
+class ORTModel:
+    def __init__(
+        self,
+        model: Union[str, os.PathLike],
+        compute_metrics: Optional[Callable[[EvalPrediction], Dict]] = None,
+        label_names: Optional[List[str]] = None,
+    ):
+        """
+        Args:
+            model_path (`Union[str, os.PathLike]`):
+                The path to the model ONNX Intermediate Representation (IR).
+            execution_provider (:obj:`str`, `optional`):
+                ONNX Runtime execution provider to use.
+            compute_metrics (`Callable[[EvalPrediction], Dict]`, `optional`):
+                The function that will be used to compute metrics at evaluation. Must take an `EvalPrediction` and
+                return a dictionary string to metric values.
+            label_names (`List[str]`, `optional`):
+                The list of keys in your dictionary of inputs that correspond to the labels.
+        """
+        self.compute_metrics = compute_metrics
+        self.label_names = ["labels"] if label_names is None else label_names
+        self.session = onnxruntime.InferenceSession(model.SerializeToString(), 
+                                                    providers=onnxruntime.get_available_providers())
+        self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())}
+
+    def evaluation_loop(self, dataset: Dataset):
+        """
+        Run evaluation and returns metrics and predictions.
+
+        Args:
+            dataset (`datasets.Dataset`):
+                Dataset to use for the evaluation step.
+        """
+        logger.info("***** Running evaluation *****")
+        all_preds = None
+        all_labels = None
+        onnx_inputs = {}
+        for step, inputs in enumerate(dataset):
+            has_labels = all(inputs.get(k) is not None for k in self.label_names)
+            if has_labels:
+                labels = tuple(np.array([inputs.get(name)]) for name in self.label_names)
+                if len(labels) == 1:
+                    labels = labels[0]
+            else:
+                labels = None
+            
+            for key in self.onnx_input_names:
+                if key in inputs:
+                    onnx_inputs[key] = np.array([inputs[key]])
+                elif key == 'pixel_values':
+                    onnx_inputs[key] = np.array([inputs['images']], dtype=np.float32)
+            preds = self.session.run(None, onnx_inputs)
+            if len(preds) == 1:
+                preds = preds[0]
+            all_preds = preds if all_preds is None else nested_concat(all_preds, preds, padding_index=-100)
+            all_labels = labels if all_labels is None else nested_concat(all_labels, labels, padding_index=-100)
+        if self.compute_metrics is not None and all_preds is not None and all_labels is not None:
+            metrics = self.compute_metrics(EvalPrediction(predictions=all_preds, label_ids=all_labels))
+        else:
+            metrics = {}
+        return EvalLoopOutput(predictions=all_preds, label_ids=all_labels, metrics=metrics, num_samples=len(dataset))
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
similarity index 92%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
index 80ac2e1ca53..3b0a527da51 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/requirements.txt
@@ -1,19 +1,19 @@
-accelerate
-datasets
-transformers
-seqeval
-tensorboard
-sentencepiece
-timm
-fvcore
-Pillow
-einops
-textdistance
-shapely
-protobuf
-torch
-torchvision
-setuptools
-onnx
-onnxruntime
+accelerate
+datasets
+transformers
+seqeval
+tensorboard
+sentencepiece
+timm
+fvcore
+Pillow
+einops
+textdistance
+shapely
+protobuf
+torch
+torchvision
+setuptools
+onnx
+onnxruntime
 onnxruntime-extensions; python_version < '3.11'
\ No newline at end of file
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py
similarity index 97%
rename from examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py
rename to examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py
index c8c36be4931..4635c541873 100644
--- a/examples/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py
+++ b/examples/deprecated/onnxrt/nlp/huggingface_model/token_classification/layoutlmv3/quantization/ptq_static/utils.py
@@ -1,462 +1,462 @@
-import torchvision.transforms.functional as F
-import warnings
-import math
-import random
-import numpy as np
-from PIL import Image
-import torch
-
-from fvcore.transforms.transform import TransformList, Transform
-
-from iopath.common.file_io import PathManager as PathManagerBase
-PathManager = PathManagerBase()
-
-_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
-
-# https://www.exiv2.org/tags.html
-_EXIF_ORIENT = 274  # exif 'Orientation' tag
-
-def _apply_exif_orientation(image):
-    """
-    Applies the exif orientation correctly.
-
-    This code exists per the bug:
-      https://github.com/python-pillow/Pillow/issues/3973
-    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
-    various methods, especially `tobytes`
-
-    Function based on:
-      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
-      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
-
-    Args:
-        image (PIL.Image): a PIL image
-
-    Returns:
-        (PIL.Image): the PIL image with exif orientation applied, if applicable
-    """
-    if not hasattr(image, "getexif"):
-        return image
-
-    try:
-        exif = image.getexif()
-    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
-        exif = None
-
-    if exif is None:
-        return image
-
-    orientation = exif.get(_EXIF_ORIENT)
-
-    method = {
-        2: Image.FLIP_LEFT_RIGHT,
-        3: Image.ROTATE_180,
-        4: Image.FLIP_TOP_BOTTOM,
-        5: Image.TRANSPOSE,
-        6: Image.ROTATE_270,
-        7: Image.TRANSVERSE,
-        8: Image.ROTATE_90,
-    }.get(orientation)
-
-    if method is not None:
-        return image.transpose(method)
-    return image
-
-
-def convert_PIL_to_numpy(image, format):
-    """
-    Convert PIL image to numpy array of target format.
-
-    Args:
-        image (PIL.Image): a PIL image
-        format (str): the format of output image
-
-    Returns:
-        (np.ndarray): also see `read_image`
-    """
-    if format is not None:
-        # PIL only supports RGB, so convert to RGB and flip channels over below
-        conversion_format = format
-        if format in ["BGR", "YUV-BT.601"]:
-            conversion_format = "RGB"
-        image = image.convert(conversion_format)
-    image = np.asarray(image)
-    # PIL squeezes out the channel dimension for "L", so make it HWC
-    if format == "L":
-        image = np.expand_dims(image, -1)
-
-    # handle formats not supported by PIL
-    elif format == "BGR":
-        # flip channels if needed
-        image = image[:, :, ::-1]
-    elif format == "YUV-BT.601":
-        image = image / 255.0
-        image = np.dot(image, np.array(_M_RGB2YUV).T)
-
-    return image
-
-
-def read_image(file_name, format=None):
-    """
-    Read an image into the given format.
-    Will apply rotation and flipping if the image has such exif information.
-
-    Args:
-        file_name (str): image file path
-        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
-
-    Returns:
-        image (np.ndarray):
-            an HWC image in the given format, which is 0-255, uint8 for
-            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
-    """
-    with PathManager.open(file_name, "rb") as f:
-        image = Image.open(f)
-
-        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
-        image = _apply_exif_orientation(image)
-        return convert_PIL_to_numpy(image, format)
-
- 
-class ResizeTransform(Transform):
-    """
-    Resize the image to a target size.
-    """
-
-    def __init__(self, h, w, new_h, new_w, interp=None):
-        """
-        Args:
-            h, w (int): original image size
-            new_h, new_w (int): new image size
-            interp: PIL interpolation methods, defaults to bilinear.
-        """
-        # TODO decide on PIL vs opencv
-        super().__init__()
-        if interp is None:
-            interp = Image.BILINEAR
-        self._set_attributes(locals())
-
-    def apply_image(self, img, interp=None):
-        assert img.shape[:2] == (self.h, self.w)
-        assert len(img.shape) <= 4
-        interp_method = interp if interp is not None else self.interp
-
-        if img.dtype == np.uint8:
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                pil_image = Image.fromarray(img[:, :, 0], mode="L")
-            else:
-                pil_image = Image.fromarray(img)
-            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
-            ret = np.asarray(pil_image)
-            if len(img.shape) > 2 and img.shape[2] == 1:
-                ret = np.expand_dims(ret, -1)
-        else:
-            # PIL only supports uint8
-            if any(x < 0 for x in img.strides):
-                img = np.ascontiguousarray(img)
-            img = torch.from_numpy(img)
-            shape = list(img.shape)
-            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
-            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
-            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
-                Image.NEAREST: "nearest",
-                Image.BILINEAR: "bilinear",
-                Image.BICUBIC: "bicubic",
-            }
-            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
-            align_corners = None if mode == "nearest" else False
-            img = torch.nn.functional.interpolate(
-                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
-            )
-            shape[:2] = (self.new_h, self.new_w)
-            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
-
-        return ret
-
-    def apply_coords(self, coords):
-        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
-        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
-        return coords
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-    def inverse(self):
-        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
-
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-
-def load_image(image_path):
-    image = read_image(image_path, format="BGR")
-    h = image.shape[0]
-    w = image.shape[1]
-    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
-    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
-    return image, (w, h)
-
-
-def crop(image, i, j, h, w, boxes=None):
-    cropped_image = F.crop(image, i, j, h, w)
-
-    if boxes is not None:
-        # Currently we cannot use this case since when some boxes is out of the cropped image,
-        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
-        # which haven't been implemented here
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        boxes = cropped_boxes.reshape(-1, 4)
-
-    return cropped_image, boxes
-
-
-def resize(image, size, interpolation, boxes=None):
-    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
-    # which is compatible with a square image size of 224x224
-    rescaled_image = F.resize(image, size, interpolation)
-
-    if boxes is None:
-        return rescaled_image, None
-
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-
-    # boxes = boxes.copy()
-    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
-    return rescaled_image, scaled_boxes
-
-
-def clamp(num, min_value, max_value):
-    return max(min(num, max_value), min_value)
-
-
-def get_bb(bb, page_size):
-    bbs = [float(j) for j in bb]
-    xs, ys = [], []
-    for i, b in enumerate(bbs):
-        if i % 2 == 0:
-            xs.append(b)
-        else:
-            ys.append(b)
-    (width, height) = page_size
-    return_bb = [
-        clamp(min(xs), 0, width - 1),
-        clamp(min(ys), 0, height - 1),
-        clamp(max(xs), 0, width - 1),
-        clamp(max(ys), 0, height - 1),
-    ]
-    return_bb = [
-            int(1000 * return_bb[0] / width),
-            int(1000 * return_bb[1] / height),
-            int(1000 * return_bb[2] / width),
-            int(1000 * return_bb[3] / height),
-        ]
-    return return_bb
-
-
-class ToNumpy:
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return np_img
-
-
-class ToTensor:
-
-    def __init__(self, dtype=torch.float32):
-        self.dtype = dtype
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return torch.from_numpy(np_img).to(dtype=self.dtype)
-
-
-_pil_interpolation_to_str = {
-    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
-    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
-    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
-    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
-    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
-    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
-}
-
-
-def _pil_interp(method):
-    if method == 'bicubic':
-        return F.InterpolationMode.BICUBIC
-    elif method == 'lanczos':
-        return F.InterpolationMode.LANCZOS
-    elif method == 'hamming':
-        return F.InterpolationMode.HAMMING
-    else:
-        # default bilinear, do we want to allow nearest?
-        return F.InterpolationMode.BILINEAR
-
-
-class Compose:
-    """Composes several transforms together. This transform does not support torchscript.
-    Please, see the note below.
-
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.PILToTensor(),
-        >>>     transforms.ConvertImageDtype(torch.float),
-        >>> ])
-
-    .. note::
-        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
-
-        >>> transforms = torch.nn.Sequential(
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-        >>> )
-        >>> scripted_transforms = torch.jit.script(transforms)
-
-        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
-        `lambda` functions or ``PIL.Image``.
-
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img, augmentation=False, box=None):
-        for t in self.transforms:
-            img = t(img, augmentation, box)
-        return img
-
-
-class RandomResizedCropAndInterpolationWithTwoPic:
-    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
-    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
-    is finally resized to given size.
-    This is popularly used to train the Inception networks.
-    Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
-    """
-
-    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
-                 interpolation='bilinear', second_interpolation='lanczos'):
-        if isinstance(size, tuple):
-            self.size = size
-        else:
-            self.size = (size, size)
-        if second_size is not None:
-            if isinstance(second_size, tuple):
-                self.second_size = second_size
-            else:
-                self.second_size = (second_size, second_size)
-        else:
-            self.second_size = None
-        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
-
-        self.interpolation = _pil_interp(interpolation)
-        self.second_interpolation = _pil_interp(second_interpolation)
-        self.scale = scale
-        self.ratio = ratio
-
-    @staticmethod
-    def get_params(img, scale, ratio):
-        """Get parameters for ``crop`` for a random sized crop.
-        Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
-            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
-        Returns:
-            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
-                sized crop.
-        """
-        area = img.size[0] * img.size[1]
-
-        for attempt in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w <= img.size[0] and h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
-                return i, j, h, w
-
-        # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
-        if in_ratio < min(ratio):
-            w = img.size[0]
-            h = int(round(w / min(ratio)))
-        elif in_ratio > max(ratio):
-            h = img.size[1]
-            w = int(round(h * max(ratio)))
-        else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
-        return i, j, h, w
-
-    def __call__(self, img, augmentation=False, box=None):
-        """
-        Args:
-            img (PIL Image): Image to be cropped and resized.
-        Returns:
-            PIL Image: Randomly cropped and resized image.
-        """
-        if augmentation:
-            i, j, h, w = self.get_params(img, self.scale, self.ratio)
-            img = F.crop(img, i, j, h, w)
-            # img, box = crop(img, i, j, h, w, box)
-        img = F.resize(img, self.size, self.interpolation)
-        second_img = F.resize(img, self.second_size, self.second_interpolation) \
-            if self.second_size is not None else None
-        return img, second_img
-
-    def __repr__(self):
-        if isinstance(self.interpolation, (tuple, list)):
-            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
-        else:
-            interpolate_str = _pil_interpolation_to_str[self.interpolation]
-        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
-        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
-        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
-        format_string += ', interpolation={0}'.format(interpolate_str)
-        if self.second_size is not None:
-            format_string += ', second_size={0}'.format(self.second_size)
-            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
-        format_string += ')'
-        return format_string
-
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, 'rb') as f:
-        img = Image.open(f)
-        return img.convert('RGB')
-    
+import torchvision.transforms.functional as F
+import warnings
+import math
+import random
+import numpy as np
+from PIL import Image
+import torch
+
+from fvcore.transforms.transform import TransformList, Transform
+
+from iopath.common.file_io import PathManager as PathManagerBase
+PathManager = PathManagerBase()
+
+_M_RGB2YUV = [[0.299, 0.587, 0.114], [-0.14713, -0.28886, 0.436], [0.615, -0.51499, -0.10001]]
+
+# https://www.exiv2.org/tags.html
+_EXIF_ORIENT = 274  # exif 'Orientation' tag
+
+def _apply_exif_orientation(image):
+    """
+    Applies the exif orientation correctly.
+
+    This code exists per the bug:
+      https://github.com/python-pillow/Pillow/issues/3973
+    with the function `ImageOps.exif_transpose`. The Pillow source raises errors with
+    various methods, especially `tobytes`
+
+    Function based on:
+      https://github.com/wkentaro/labelme/blob/v4.5.4/labelme/utils/image.py#L59
+      https://github.com/python-pillow/Pillow/blob/7.1.2/src/PIL/ImageOps.py#L527
+
+    Args:
+        image (PIL.Image): a PIL image
+
+    Returns:
+        (PIL.Image): the PIL image with exif orientation applied, if applicable
+    """
+    if not hasattr(image, "getexif"):
+        return image
+
+    try:
+        exif = image.getexif()
+    except Exception:  # https://github.com/facebookresearch/detectron2/issues/1885
+        exif = None
+
+    if exif is None:
+        return image
+
+    orientation = exif.get(_EXIF_ORIENT)
+
+    method = {
+        2: Image.FLIP_LEFT_RIGHT,
+        3: Image.ROTATE_180,
+        4: Image.FLIP_TOP_BOTTOM,
+        5: Image.TRANSPOSE,
+        6: Image.ROTATE_270,
+        7: Image.TRANSVERSE,
+        8: Image.ROTATE_90,
+    }.get(orientation)
+
+    if method is not None:
+        return image.transpose(method)
+    return image
+
+
+def convert_PIL_to_numpy(image, format):
+    """
+    Convert PIL image to numpy array of target format.
+
+    Args:
+        image (PIL.Image): a PIL image
+        format (str): the format of output image
+
+    Returns:
+        (np.ndarray): also see `read_image`
+    """
+    if format is not None:
+        # PIL only supports RGB, so convert to RGB and flip channels over below
+        conversion_format = format
+        if format in ["BGR", "YUV-BT.601"]:
+            conversion_format = "RGB"
+        image = image.convert(conversion_format)
+    image = np.asarray(image)
+    # PIL squeezes out the channel dimension for "L", so make it HWC
+    if format == "L":
+        image = np.expand_dims(image, -1)
+
+    # handle formats not supported by PIL
+    elif format == "BGR":
+        # flip channels if needed
+        image = image[:, :, ::-1]
+    elif format == "YUV-BT.601":
+        image = image / 255.0
+        image = np.dot(image, np.array(_M_RGB2YUV).T)
+
+    return image
+
+
+def read_image(file_name, format=None):
+    """
+    Read an image into the given format.
+    Will apply rotation and flipping if the image has such exif information.
+
+    Args:
+        file_name (str): image file path
+        format (str): one of the supported image modes in PIL, or "BGR" or "YUV-BT.601".
+
+    Returns:
+        image (np.ndarray):
+            an HWC image in the given format, which is 0-255, uint8 for
+            supported image modes in PIL or "BGR"; float (0-1 for Y) for YUV-BT.601.
+    """
+    with PathManager.open(file_name, "rb") as f:
+        image = Image.open(f)
+
+        # work around this bug: https://github.com/python-pillow/Pillow/issues/3973
+        image = _apply_exif_orientation(image)
+        return convert_PIL_to_numpy(image, format)
+
+ 
+class ResizeTransform(Transform):
+    """
+    Resize the image to a target size.
+    """
+
+    def __init__(self, h, w, new_h, new_w, interp=None):
+        """
+        Args:
+            h, w (int): original image size
+            new_h, new_w (int): new image size
+            interp: PIL interpolation methods, defaults to bilinear.
+        """
+        # TODO decide on PIL vs opencv
+        super().__init__()
+        if interp is None:
+            interp = Image.BILINEAR
+        self._set_attributes(locals())
+
+    def apply_image(self, img, interp=None):
+        assert img.shape[:2] == (self.h, self.w)
+        assert len(img.shape) <= 4
+        interp_method = interp if interp is not None else self.interp
+
+        if img.dtype == np.uint8:
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                pil_image = Image.fromarray(img[:, :, 0], mode="L")
+            else:
+                pil_image = Image.fromarray(img)
+            pil_image = pil_image.resize((self.new_w, self.new_h), interp_method)
+            ret = np.asarray(pil_image)
+            if len(img.shape) > 2 and img.shape[2] == 1:
+                ret = np.expand_dims(ret, -1)
+        else:
+            # PIL only supports uint8
+            if any(x < 0 for x in img.strides):
+                img = np.ascontiguousarray(img)
+            img = torch.from_numpy(img)
+            shape = list(img.shape)
+            shape_4d = shape[:2] + [1] * (4 - len(shape)) + shape[2:]
+            img = img.view(shape_4d).permute(2, 3, 0, 1)  # hw(c) -> nchw
+            _PIL_RESIZE_TO_INTERPOLATE_MODE = {
+                Image.NEAREST: "nearest",
+                Image.BILINEAR: "bilinear",
+                Image.BICUBIC: "bicubic",
+            }
+            mode = _PIL_RESIZE_TO_INTERPOLATE_MODE[interp_method]
+            align_corners = None if mode == "nearest" else False
+            img = torch.nn.functional.interpolate(
+                img, (self.new_h, self.new_w), mode=mode, align_corners=align_corners
+            )
+            shape[:2] = (self.new_h, self.new_w)
+            ret = img.permute(2, 3, 0, 1).view(shape).numpy()  # nchw -> hw(c)
+
+        return ret
+
+    def apply_coords(self, coords):
+        coords[:, 0] = coords[:, 0] * (self.new_w * 1.0 / self.w)
+        coords[:, 1] = coords[:, 1] * (self.new_h * 1.0 / self.h)
+        return coords
+
+    def apply_segmentation(self, segmentation):
+        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
+        return segmentation
+
+    def inverse(self):
+        return ResizeTransform(self.new_h, self.new_w, self.h, self.w, self.interp)
+
+
+def normalize_bbox(bbox, size):
+    return [
+        int(1000 * bbox[0] / size[0]),
+        int(1000 * bbox[1] / size[1]),
+        int(1000 * bbox[2] / size[0]),
+        int(1000 * bbox[3] / size[1]),
+    ]
+
+
+def load_image(image_path):
+    image = read_image(image_path, format="BGR")
+    h = image.shape[0]
+    w = image.shape[1]
+    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
+    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
+    return image, (w, h)
+
+
+def crop(image, i, j, h, w, boxes=None):
+    cropped_image = F.crop(image, i, j, h, w)
+
+    if boxes is not None:
+        # Currently we cannot use this case since when some boxes is out of the cropped image,
+        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
+        # which haven't been implemented here
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        boxes = cropped_boxes.reshape(-1, 4)
+
+    return cropped_image, boxes
+
+
+def resize(image, size, interpolation, boxes=None):
+    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
+    # which is compatible with a square image size of 224x224
+    rescaled_image = F.resize(image, size, interpolation)
+
+    if boxes is None:
+        return rescaled_image, None
+
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
+    ratio_width, ratio_height = ratios
+
+    # boxes = boxes.copy()
+    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
+
+    return rescaled_image, scaled_boxes
+
+
+def clamp(num, min_value, max_value):
+    return max(min(num, max_value), min_value)
+
+
+def get_bb(bb, page_size):
+    bbs = [float(j) for j in bb]
+    xs, ys = [], []
+    for i, b in enumerate(bbs):
+        if i % 2 == 0:
+            xs.append(b)
+        else:
+            ys.append(b)
+    (width, height) = page_size
+    return_bb = [
+        clamp(min(xs), 0, width - 1),
+        clamp(min(ys), 0, height - 1),
+        clamp(max(xs), 0, width - 1),
+        clamp(max(ys), 0, height - 1),
+    ]
+    return_bb = [
+            int(1000 * return_bb[0] / width),
+            int(1000 * return_bb[1] / height),
+            int(1000 * return_bb[2] / width),
+            int(1000 * return_bb[3] / height),
+        ]
+    return return_bb
+
+
+class ToNumpy:
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return np_img
+
+
+class ToTensor:
+
+    def __init__(self, dtype=torch.float32):
+        self.dtype = dtype
+
+    def __call__(self, pil_img):
+        np_img = np.array(pil_img, dtype=np.uint8)
+        if np_img.ndim < 3:
+            np_img = np.expand_dims(np_img, axis=-1)
+        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
+        return torch.from_numpy(np_img).to(dtype=self.dtype)
+
+
+_pil_interpolation_to_str = {
+    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
+    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
+    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
+    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
+    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
+    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
+}
+
+
+def _pil_interp(method):
+    if method == 'bicubic':
+        return F.InterpolationMode.BICUBIC
+    elif method == 'lanczos':
+        return F.InterpolationMode.LANCZOS
+    elif method == 'hamming':
+        return F.InterpolationMode.HAMMING
+    else:
+        # default bilinear, do we want to allow nearest?
+        return F.InterpolationMode.BILINEAR
+
+
+class Compose:
+    """Composes several transforms together. This transform does not support torchscript.
+    Please, see the note below.
+
+    Args:
+        transforms (list of ``Transform`` objects): list of transforms to compose.
+
+    Example:
+        >>> transforms.Compose([
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.PILToTensor(),
+        >>>     transforms.ConvertImageDtype(torch.float),
+        >>> ])
+
+    .. note::
+        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
+
+        >>> transforms = torch.nn.Sequential(
+        >>>     transforms.CenterCrop(10),
+        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
+        >>> )
+        >>> scripted_transforms = torch.jit.script(transforms)
+
+        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
+        `lambda` functions or ``PIL.Image``.
+
+    """
+
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, img, augmentation=False, box=None):
+        for t in self.transforms:
+            img = t(img, augmentation, box)
+        return img
+
+
+class RandomResizedCropAndInterpolationWithTwoPic:
+    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
+    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
+    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
+    is finally resized to given size.
+    This is popularly used to train the Inception networks.
+    Args:
+        size: expected output size of each edge
+        scale: range of size of the origin size cropped
+        ratio: range of aspect ratio of the origin aspect ratio cropped
+        interpolation: Default: PIL.Image.BILINEAR
+    """
+
+    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
+                 interpolation='bilinear', second_interpolation='lanczos'):
+        if isinstance(size, tuple):
+            self.size = size
+        else:
+            self.size = (size, size)
+        if second_size is not None:
+            if isinstance(second_size, tuple):
+                self.second_size = second_size
+            else:
+                self.second_size = (second_size, second_size)
+        else:
+            self.second_size = None
+        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
+            warnings.warn("range should be of kind (min, max)")
+
+        self.interpolation = _pil_interp(interpolation)
+        self.second_interpolation = _pil_interp(second_interpolation)
+        self.scale = scale
+        self.ratio = ratio
+
+    @staticmethod
+    def get_params(img, scale, ratio):
+        """Get parameters for ``crop`` for a random sized crop.
+        Args:
+            img (PIL Image): Image to be cropped.
+            scale (tuple): range of size of the origin size cropped
+            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
+        Returns:
+            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
+                sized crop.
+        """
+        area = img.size[0] * img.size[1]
+
+        for attempt in range(10):
+            target_area = random.uniform(*scale) * area
+            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
+            aspect_ratio = math.exp(random.uniform(*log_ratio))
+
+            w = int(round(math.sqrt(target_area * aspect_ratio)))
+            h = int(round(math.sqrt(target_area / aspect_ratio)))
+
+            if w <= img.size[0] and h <= img.size[1]:
+                i = random.randint(0, img.size[1] - h)
+                j = random.randint(0, img.size[0] - w)
+                return i, j, h, w
+
+        # Fallback to central crop
+        in_ratio = img.size[0] / img.size[1]
+        if in_ratio < min(ratio):
+            w = img.size[0]
+            h = int(round(w / min(ratio)))
+        elif in_ratio > max(ratio):
+            h = img.size[1]
+            w = int(round(h * max(ratio)))
+        else:  # whole image
+            w = img.size[0]
+            h = img.size[1]
+        i = (img.size[1] - h) // 2
+        j = (img.size[0] - w) // 2
+        return i, j, h, w
+
+    def __call__(self, img, augmentation=False, box=None):
+        """
+        Args:
+            img (PIL Image): Image to be cropped and resized.
+        Returns:
+            PIL Image: Randomly cropped and resized image.
+        """
+        if augmentation:
+            i, j, h, w = self.get_params(img, self.scale, self.ratio)
+            img = F.crop(img, i, j, h, w)
+            # img, box = crop(img, i, j, h, w, box)
+        img = F.resize(img, self.size, self.interpolation)
+        second_img = F.resize(img, self.second_size, self.second_interpolation) \
+            if self.second_size is not None else None
+        return img, second_img
+
+    def __repr__(self):
+        if isinstance(self.interpolation, (tuple, list)):
+            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
+        else:
+            interpolate_str = _pil_interpolation_to_str[self.interpolation]
+        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
+        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
+        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
+        format_string += ', interpolation={0}'.format(interpolate_str)
+        if self.second_size is not None:
+            format_string += ', second_size={0}'.format(self.second_size)
+            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
+        format_string += ')'
+        return format_string
+
+
+def pil_loader(path: str) -> Image.Image:
+    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
+    with open(path, 'rb') as f:
+        img = Image.open(f)
+        return img.convert('RGB')
+    
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_data.sh
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_data.sh
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/mobilebert/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/mobilebert/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/BiDAF/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_onnx_squad.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/squad_evaluate.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/bert-squad/quantization/ptq_dynamic/tokenization.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/gpt2.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py
similarity index 99%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py
index 6c68bba6ffc..0506d66fc79 100644
--- a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py
+++ b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/prepare_model.py
@@ -61,7 +61,7 @@ def save_data(test_data_dir, prefix, names, data_list):
 def save_model(name, model, inputs, outputs, input_names=None, output_names=None, **kwargs):
     if hasattr(model, 'train'):
         model.train(False)
-    dir = './'
+    dir = '/'
     if not os.path.exists(dir):
         os.makedirs(dir)
     dir = os.path.join(dir, name)
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/gpt2/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_onnx_squad.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/squad_evaluate.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_dynamic/tokenization.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_onnx_squad.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/squad_evaluate.py
diff --git a/examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py b/examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py
similarity index 100%
rename from examples/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py
rename to examples/deprecated/onnxrt/nlp/onnx_model_zoo/mobilebert/quantization/ptq_static/tokenization.py
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/README.md
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/main.py
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_data.sh
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.py b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/prepare_model.py
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/requirements.txt
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_quant.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_dynamic/run_quant.sh
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/prepare_data.sh
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/nlp/roberta/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/nlp/roberta/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/nlp/roberta/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/cityscapes_labels.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/DUC/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py
similarity index 95%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py
index 1e88f8abad8..e84acdd99c9 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_label_map.py
@@ -1,103 +1,103 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
 }
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py
index 2c57fd61302..1eebfcb5ff5 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/coco_tools.py
@@ -1,672 +1,672 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
     return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml
similarity index 92%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml
index a15a9a6b0d5..2129626e341 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/label_map.yaml
@@ -1,80 +1,80 @@
-person: 1
-bicycle: 2
-car: 3
-motorcycle: 4
-airplane: 5
-bus: 6
-train: 7
-truck: 8
-boat: 9
-traffic light: 10
-fire hydrant: 11
-stop sign: 12
-parking meter: 13
-bench: 14
-bird: 15
-cat: 16
-dog: 17
-horse: 18
-sheep: 19
-cow: 20
-elephant: 21
-bear: 22
-zebra: 23
-giraffe: 24
-backpack: 25
-umbrella: 26
-handbag: 27
-tie: 28
-suitcase: 29
-frisbee: 30
-skis: 31
-snowboard: 32
-sports ball: 33
-kite: 34
-baseball bat: 35
-baseball glove: 36
-skateboard: 37
-surfboard: 38
-tennis racket: 39
-bottle: 40
-wine glass: 41
-cup: 42
-fork: 43
-knife: 44
-spoon: 45
-bowl: 46
-banana: 47
-apple: 48
-sandwich: 49
-orange: 50
-broccoli: 51
-carrot: 52
-hot dog: 53
-pizza: 54
-donut: 55
-cake: 56
-chair: 57
-couch: 58
-potted plant: 59
-bed: 60
-dining table: 61
-toilet: 62
-tv: 63
-laptop: 64
-mouse: 65
-remote: 66
-keyboard: 67
-cell phone: 68
-microwave: 69
-oven: 70
-toaster: 71
-sink: 72
-refrigerator: 73
-book: 74
-clock: 75
-vase: 76
-scissors: 77
-teddy bear: 78
-hair drier: 79
+person: 1
+bicycle: 2
+car: 3
+motorcycle: 4
+airplane: 5
+bus: 6
+train: 7
+truck: 8
+boat: 9
+traffic light: 10
+fire hydrant: 11
+stop sign: 12
+parking meter: 13
+bench: 14
+bird: 15
+cat: 16
+dog: 17
+horse: 18
+sheep: 19
+cow: 20
+elephant: 21
+bear: 22
+zebra: 23
+giraffe: 24
+backpack: 25
+umbrella: 26
+handbag: 27
+tie: 28
+suitcase: 29
+frisbee: 30
+skis: 31
+snowboard: 32
+sports ball: 33
+kite: 34
+baseball bat: 35
+baseball glove: 36
+skateboard: 37
+surfboard: 38
+tennis racket: 39
+bottle: 40
+wine glass: 41
+cup: 42
+fork: 43
+knife: 44
+spoon: 45
+bowl: 46
+banana: 47
+apple: 48
+sandwich: 49
+orange: 50
+broccoli: 51
+carrot: 52
+hot dog: 53
+pizza: 54
+donut: 55
+cake: 56
+chair: 57
+couch: 58
+potted plant: 59
+bed: 60
+dining table: 61
+toilet: 62
+tv: 63
+laptop: 64
+mouse: 65
+remote: 66
+keyboard: 67
+cell phone: 68
+microwave: 69
+oven: 70
+toaster: 71
+sink: 72
+refrigerator: 73
+book: 74
+clock: 75
+vase: 76
+scissors: 77
+teddy bear: 78
+hair drier: 79
 toothbrush: 80
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/faster_rcnn/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py
similarity index 95%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py
index 1e88f8abad8..e84acdd99c9 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_label_map.py
@@ -1,103 +1,103 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
 }
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py
index 2c57fd61302..1eebfcb5ff5 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py
@@ -1,672 +1,672 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
     return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py
similarity index 95%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py
index 1e88f8abad8..e84acdd99c9 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_label_map.py
@@ -1,103 +1,103 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
 }
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py
index 2c57fd61302..1eebfcb5ff5 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/coco_tools.py
@@ -1,672 +1,672 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
     return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py
similarity index 97%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py
index f6123ea7102..afc53bce1ae 100644
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py
@@ -1,470 +1,471 @@
-import numpy as np
-import collections
-from PIL import Image
-import os
-import yaml
-from pycocotools.coco import COCO
-import cv2
-
-class SequentialSampler():
-    def __init__(self, dataset):
-        self.whole_dataset = dataset
-
-    def __iter__(self):
-        self.process_rank = 0 # The default rank is 0, which represents the main process
-        self.process_size = 1 # By default, process_size=1, only the main process is running
-        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
-
-    def __len__(self):
-        return len(self.whole_dataset)
-
-class BatchSampler():
-    def __init__(self, sampler, batch_size, drop_last=True):
-        if isinstance(drop_last, bool):
-            self.drop_last = drop_last
-        else:
-            raise ValueError("last_batch only support bool as input")
-
-        self.sampler = sampler
-        self.batch_size = batch_size
-        self.drop_last = drop_last
-
-    def __iter__(self):
-        batch = []
-        for idx in self.sampler:
-            batch.append(idx)
-            if len(batch) == self.batch_size:
-                yield batch
-                batch = []
-        if len(batch) > 0 and not self.drop_last:
-            yield batch
-
-    def __len__(self):
-        if self.drop_last:
-            return len(self.sampler) // self.batch_size
-        else:
-            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
-
-class IndexFetcher():
-    def __init__(self, dataset, collate_fn, drop_last):
-        self.dataset = dataset
-        self.collate_fn = collate_fn
-        self.drop_last = drop_last
-
-    def __call__(self, batched_indices):
-        data = [self.dataset[idx] for idx in batched_indices]
-        return self.collate_fn(data)
-
-
-def default_collate(batch):
-    """Merge data with outer dimension batch size."""
-    elem = batch[0]
-    if isinstance(elem, collections.abc.Mapping):
-        return {key: default_collate([d[key] for d in batch]) for key in elem}
-    elif isinstance(elem, collections.abc.Sequence):
-        batch = zip(*batch)
-        return [default_collate(samples) for samples in batch]
-    elif isinstance(elem, np.ndarray):
-        try:
-            return np.stack(batch)
-        except:
-            return batch
-    else:
-        return batch
-
-class COCORawDataloader():
-    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
-                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
-                 shuffle=False):
-        self.dataset = dataset
-        self.last_batch = last_batch
-        self.sampler = sampler
-        self.batch_sampler = batch_sampler
-        self.num_workers = num_workers
-        self.pin_memory = pin_memory
-        self.collate_fn = collate_fn
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.drop_last = False if last_batch == 'rollover' else True
-        if self.collate_fn == None:
-            self.collate_fn = default_collate
-
-    def __iter__(self):
-        """Yield data in iterative order."""
-        return self._generate_dataloader(
-            self.dataset,
-            batch_size=self.batch_size,
-            last_batch=self.last_batch,
-            collate_fn=self.collate_fn,
-            sampler=self.sampler,
-            batch_sampler=self.batch_sampler,
-            num_workers=self.num_workers,
-            pin_memory=self.pin_memory,
-            shuffle=self.shuffle)
-
-    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
-                             batch_sampler, num_workers, pin_memory, shuffle):
-
-        sampler = self._generate_sampler(dataset)
-        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
-        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
-
-        for batched_indices in self.batch_sampler:
-            try:
-                data = self.fetcher(batched_indices)
-                yield data
-            except StopIteration:
-                return
-
-    def _generate_sampler(self, dataset):
-        if hasattr(dataset, "__getitem__"):
-            self.dataset_type = 'index'
-            return SequentialSampler(dataset)
-        else:
-            raise ValueError("dataset type only support (index, iter)")
-
-
-class COCORawDataset():
-    """Coco raw dataset.
-    Please arrange data in this way:
-        root
-          ├──  1.jpg
-          ├──  2.jpg
-               ...
-          └──  n.jpg
-        anno_dir
-    Please use Resize transform when batch_size > 1
-    Args: root (str): Root directory of dataset.
-          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according 
-                                                 to specific conditions.
-    """
-
-    def __init__(self, root, \
-            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
-        """Initialize the attributes of class."""
-        self.batch_size = 1
-        self.image_list = []
-        self.transform = transform
-        img_path = root
-        anno_path = os.path.join(os.path.dirname(root), anno_dir)
-        coco = COCO(anno_path)
-        img_ids = coco.getImgIds()
-        cat_ids = coco.getCatIds()
-        for idx, img_id in enumerate(img_ids):
-            img_info = {}
-            bboxes = []
-            labels = []
-            ids = []
-            img_detail = coco.loadImgs(img_id)[0]
-            ids.append(img_detail['file_name'].encode('utf-8'))
-            pic_height = img_detail['height']
-            pic_width = img_detail['width']
-
-            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
-            anns = coco.loadAnns(ann_ids)
-            for ann in anns:
-                bbox = ann['bbox']
-                if len(bbox) == 0:
-                    continue
-                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
-                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
-                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
-                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
-            img_file = os.path.join(img_path, img_detail['file_name'])
-            if not os.path.exists(img_file) or len(bboxes) == 0:
-                continue
-
-            if filter and not filter(None, bboxes):
-                continue
-
-            with Image.open(img_file) as image:
-                image = np.array(image.convert('RGB'))
-            self.image_list.append(
-                (image, [np.array(bboxes), np.array(labels), np.array([]),\
-                 np.array(img_detail['file_name'].encode('utf-8'))]))
-
-    def __len__(self):
-        """Length of the dataset."""
-        return len(self.image_list)
-
-    def __getitem__(self, index):
-        """Magic method.
-        x[i] is roughly equivalent to type(x).__getitem__(x, index)
-        """
-        sample = self.image_list[index]
-        if self.transform is not None:
-            sample= self.transform(sample)
-        return sample
-
-interpolation_map = {
-    'nearest': cv2.INTER_NEAREST,
-    'bilinear': cv2.INTER_LINEAR,
-    'bicubic': cv2.INTER_CUBIC,
-}
-
-class ResizeTransform():
-    def __init__(self, size, interpolation='bilinear'):
-        if isinstance(size, int):
-            self.size = size, size
-        elif isinstance(size, list):
-            if len(size) == 1:
-                self.size = size[0], size[0]
-            elif len(size) == 2:
-                self.size = size[0], size[1]
-
-        if interpolation in interpolation_map.keys():
-            self.interpolation = interpolation_map[interpolation]
-        else:
-            raise ValueError("Undefined interpolation type")
-
-    def __call__(self, sample):
-        image, label = sample
-        image = cv2.resize(image, self.size, interpolation=self.interpolation)
-        if len(image.shape) == 2:
-            image = np.expand_dims(image, -1)
-        return (image, label)
-
-class RescaleTransform():
-    """Scale the values of image to [0,1].
-    Returns:
-        tuple of processed image and label
-    """
-
-    def __call__(self, sample):
-        """Scale the values of the image in sample."""
-        image, label = sample
-        if isinstance(image, np.ndarray):
-            image = image.astype('float32') / 255.
-        return (image, label)
-
-class NormalizeTransform():
-    def __init__(self, mean=[0.0], std=[1.0]):
-        self.mean = mean
-        self.std = std
-        for item in self.std:
-            if item < 10**-6:
-                raise ValueError("Std should be greater than 0")
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
-        image = (image - self.mean) / self.std
-        return (image, label)
-
-class TransposeTransform():
-    def __init__(self, perm):
-        self.perm = perm
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
-        image = np.transpose(image, axes=self.perm)
-        return (image, label)
-
-np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
-           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
-           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
-           'float16': np.float16, 'float64': np.float64, 'bool': bool,
-           'string': str, 'complex128': np.complex128, 'int16': np.int16}
-
-class CastTransform():
-    def __init__(self, dtype='float32'):
-        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
-        self.dtype = dtype
-
-    def __call__(self, sample):
-        image, label = sample
-        image = image.astype(np_dtype_map[self.dtype])
-        return (image, label)
-
-class ComposeTransform():
-    def __init__(self, transform_list):
-        self.transform_list = transform_list
-
-    def __call__(self, sample):
-        for transform in self.transform_list:
-            sample = transform(sample)
-        return sample
-
-class COCOmAPv2():
-    """Compute mean average precision of the detection task."""
-
-    def __init__(self, 
-                 anno_path=None, 
-                 iou_thrs='0.5:0.05:0.95', 
-                 map_points=101, 
-                 map_key='DetectionBoxes_Precision/mAP', 
-                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
-        """Initialize the metric.
-        Args:
-            anno_path: The path of annotation file.
-            iou_thrs: Minimal value for intersection over union that allows to make decision
-              that prediction bounding box is true positive. You can specify one float value
-              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
-            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
-              11-point interpolated AP, 0 for area under PR curve.
-            map_key: The key that mapping to pycocotools COCOeval. 
-              Defaults to 'DetectionBoxes_Precision/mAP'.
-            output_index_mapping: The output index mapping. 
-              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
-        """
-        self.output_index_mapping = output_index_mapping
-        from coco_label_map import category_map
-        if anno_path:
-            assert os.path.exists(anno_path), 'Annotation path does not exists!'
-            with open(anno_path, 'r') as f:
-                label_map = yaml.safe_load(f.read())
-            self.category_map_reverse = {k: v for k,v in label_map.items()}
-        else:
-            # label: index
-            self.category_map_reverse = {v: k for k, v in category_map.items()}
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-        self.category_map = category_map
-        self.category_id_set = set(
-            [cat for cat in self.category_map]) #index
-        self.iou_thrs = iou_thrs
-        self.map_points = map_points
-        self.map_key = map_key
-
-    def update(self, predicts, labels, sample_weight=None):
-        """Add the predictions and labels.
-        Args:
-            predicts: The predictions.
-            labels: The labels corresponding to the predictions.
-            sample_weight: The sample weight. Defaults to None.
-        """
-        from coco_tools import ExportSingleImageGroundtruthToCoco,\
-            ExportSingleImageDetectionBoxesToCoco
-        detections = []
-        if 'num_detections' in self.output_index_mapping and \
-            self.output_index_mapping['num_detections'] > -1:
-            for item in zip(*predicts):
-                detection = {}
-                num = int(item[self.output_index_mapping['num_detections']])
-                detection['boxes'] = np.asarray(
-                    item[self.output_index_mapping['boxes']])[0:num]
-                detection['scores'] = np.asarray(
-                    item[self.output_index_mapping['scores']])[0:num]
-                detection['classes'] = np.asarray(
-                    item[self.output_index_mapping['classes']])[0:num]
-                detections.append(detection)
-        else:
-            for item in zip(*predicts):
-                detection = {}
-                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
-                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
-                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
-                detections.append(detection)
-
-        bboxes, str_labels,int_labels, image_ids = labels
-        labels = []
-        if len(int_labels[0]) == 0:
-            for str_label in str_labels:
-                str_label = [
-                    x if type(x) == 'str' else x.decode('utf-8')
-                    for x in str_label
-                ]
-                labels.append([self.category_map_reverse[x] for x in str_label])
-        elif len(str_labels[0]) == 0:
-            for int_label in int_labels:
-                labels.append([x for x in int_label])
-
-        for idx, image_id in enumerate(image_ids):
-            image_id = image_id if type(
-                image_id) == 'str' else image_id.decode('utf-8')
-            if image_id in self.image_ids:
-                continue
-            self.image_ids.append(image_id)
-
-            ground_truth = {}
-            ground_truth['boxes'] = np.asarray(bboxes[idx])
-            ground_truth['classes'] = np.asarray(labels[idx])
-
-            self.ground_truth_list.extend(
-                ExportSingleImageGroundtruthToCoco(
-                    image_id=image_id,
-                    next_annotation_id=self.annotation_id,
-                    category_id_set=self.category_id_set,
-                    groundtruth_boxes=ground_truth['boxes'],
-                    groundtruth_classes=ground_truth['classes']))
-            self.annotation_id += ground_truth['boxes'].shape[0]
-
-            self.detection_list.extend(
-                ExportSingleImageDetectionBoxesToCoco(
-                    image_id=image_id,
-                    category_id_set=self.category_id_set,
-                    detection_boxes=detections[idx]['boxes'],
-                    detection_scores=detections[idx]['scores'],
-                    detection_classes=detections[idx]['classes']))
-
-    def reset(self):
-        """Reset the prediction and labels."""
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-
-    def result(self):
-        """Compute mean average precision.
-        Returns:
-            The mean average precision score.
-        """
-        from coco_tools import COCOWrapper, COCOEvalWrapper
-        if len(self.ground_truth_list) == 0:
-            return 0
-        else:
-            groundtruth_dict = {
-                'annotations':
-                self.ground_truth_list,
-                'images': [{
-                    'id': image_id
-                } for image_id in self.image_ids],
-                'categories': [{
-                    'id': k,
-                    'name': v
-                } for k, v in self.category_map.items()]
-            }
-            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
-            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
-                self.detection_list)
-            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
-                                                 coco_wrapped_detections,
-                                                 agnostic_mode=False,
-                                                 iou_thrs = self.iou_thrs,
-                                                 map_points = self.map_points)
-            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
-                include_metrics_per_category=False, all_metrics_per_category=False)
-            box_metrics.update(box_per_category_ap)
-            box_metrics = {
-                'DetectionBoxes_' + key: value
-                for key, value in iter(box_metrics.items())
-            }
-
-            return box_metrics[self.map_key]
-
-class Post:
-    def __call__(self, sample):
-        preds, labels = sample
-        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
-        return preds, labels
-
-class LabelBalanceCOCORawFilter(object):
-    """The label balance filter for COCO raw data."""
-
-    def __init__(self, size=1):
-        """Initialize the attribute of class."""
-        self.size = size
-
-    def __call__(self, image, label):
-        """Execute the filter.
-
-        Args:
-            image: Not used.
-            label: label of a sample.
-        """
+import numpy as np
+import collections
+from PIL import Image
+import os
+import yaml
+from pycocotools.coco import COCO
+import cv2
+
+class SequentialSampler():
+    def __init__(self, dataset):
+        self.whole_dataset = dataset
+
+    def __iter__(self):
+        self.process_rank = 0 # The default rank is 0, which represents the main process
+        self.process_size = 1 # By default, process_size=1, only the main process is running
+        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
+
+    def __len__(self):
+        return len(self.whole_dataset)
+
+class BatchSampler():
+    def __init__(self, sampler, batch_size, drop_last=True):
+        if isinstance(drop_last, bool):
+            self.drop_last = drop_last
+        else:
+            raise ValueError("last_batch only support bool as input")
+
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+
+    def __iter__(self):
+        batch = []
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
+                yield batch
+                batch = []
+        if len(batch) > 0 and not self.drop_last:
+            yield batch
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        else:
+            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
+
+class IndexFetcher():
+    def __init__(self, dataset, collate_fn, drop_last):
+        self.dataset = dataset
+        self.collate_fn = collate_fn
+        self.drop_last = drop_last
+
+    def __call__(self, batched_indices):
+        data = [self.dataset[idx] for idx in batched_indices]
+        return self.collate_fn(data)
+
+
+def default_collate(batch):
+    """Merge data with outer dimension batch size."""
+    elem = batch[0]
+    if isinstance(elem, collections.abc.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, collections.abc.Sequence):
+        batch = zip(*batch)
+        return [default_collate(samples) for samples in batch]
+    elif isinstance(elem, np.ndarray):
+        try:
+            return np.stack(batch)
+        except:
+            return batch
+    else:
+        return batch
+
+class COCORawDataloader():
+    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
+                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
+                 shuffle=False):
+        self.dataset = dataset
+        self.last_batch = last_batch
+        self.sampler = sampler
+        self.batch_sampler = batch_sampler
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.collate_fn = collate_fn
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = False if last_batch == 'rollover' else True
+        if self.collate_fn == None:
+            self.collate_fn = default_collate
+
+    def __iter__(self):
+        """Yield data in iterative order."""
+        return self._generate_dataloader(
+            self.dataset,
+            batch_size=self.batch_size,
+            last_batch=self.last_batch,
+            collate_fn=self.collate_fn,
+            sampler=self.sampler,
+            batch_sampler=self.batch_sampler,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=self.shuffle)
+
+    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
+                             batch_sampler, num_workers, pin_memory, shuffle):
+
+        sampler = self._generate_sampler(dataset)
+        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
+        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
+
+        for batched_indices in self.batch_sampler:
+            try:
+                data = self.fetcher(batched_indices)
+                yield data
+            except StopIteration:
+                return
+
+    def _generate_sampler(self, dataset):
+        if hasattr(dataset, "__getitem__"):
+            self.dataset_type = 'index'
+            return SequentialSampler(dataset)
+        else:
+            raise ValueError("dataset type only support (index, iter)")
+
+
+class COCORawDataset():
+    """Coco raw dataset.
+    Please arrange data in this way:
+        root
+          ├──  1.jpg
+          ├──  2.jpg
+               ...
+          └──  n.jpg
+        anno_dir
+    Please use Resize transform when batch_size > 1
+    Args: root (str): Root directory of dataset.
+          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
+          transform (transform object, default=None):  transform to process input data.
+          filter (Filter objects, default=None): filter out examples according 
+                                                 to specific conditions.
+    """
+
+    def __init__(self, root, \
+            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
+        """Initialize the attributes of class."""
+        self.batch_size = 1
+        self.image_list = []
+        self.transform = transform
+        img_path = root
+        anno_path = os.path.join(os.path.dirname(root), anno_dir)
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        cat_ids = coco.getCatIds()
+        for idx, img_id in enumerate(img_ids):
+            img_info = {}
+            bboxes = []
+            labels = []
+            ids = []
+            img_detail = coco.loadImgs(img_id)[0]
+            ids.append(img_detail['file_name'].encode('utf-8'))
+            pic_height = img_detail['height']
+            pic_width = img_detail['width']
+
+            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
+            anns = coco.loadAnns(ann_ids)
+            for ann in anns:
+                bbox = ann['bbox']
+                if len(bbox) == 0:
+                    continue
+                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
+                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
+                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
+                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
+            img_file = os.path.join(img_path, img_detail['file_name'])
+            if not os.path.exists(img_file) or len(bboxes) == 0:
+                continue
+
+            if filter and not filter(None, bboxes):
+                continue
+
+            with Image.open(img_file) as image:
+                image = np.array(image.convert('RGB'))
+            self.image_list.append(
+                (image, [np.array(bboxes), np.array(labels), np.array([]),\
+                 np.array(img_detail['file_name'].encode('utf-8'))]))
+
+    def __len__(self):
+        """Length of the dataset."""
+        return len(self.image_list)
+
+    def __getitem__(self, index):
+        """Magic method.
+        x[i] is roughly equivalent to type(x).__getitem__(x, index)
+        """
+        sample = self.image_list[index]
+        if self.transform is not None:
+            sample= self.transform(sample)
+        return sample
+
+interpolation_map = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+}
+
+class ResizeTransform():
+    def __init__(self, size, interpolation='bilinear'):
+        if isinstance(size, int):
+            self.size = size, size
+        elif isinstance(size, list):
+            if len(size) == 1:
+                self.size = size[0], size[0]
+            elif len(size) == 2:
+                self.size = size[0], size[1]
+
+        if interpolation in interpolation_map.keys():
+            self.interpolation = interpolation_map[interpolation]
+        else:
+            raise ValueError("Undefined interpolation type")
+
+    def __call__(self, sample):
+        image, label = sample
+        image = cv2.resize(image, self.size, interpolation=self.interpolation)
+        if len(image.shape) == 2:
+            image = np.expand_dims(image, -1)
+        return (image, label)
+
+class RescaleTransform():
+    """Scale the values of image to [0,1].
+    Returns:
+        tuple of processed image and label
+    """
+
+    def __call__(self, sample):
+        """Scale the values of the image in sample."""
+        image, label = sample
+        if isinstance(image, np.ndarray):
+            image = image.astype('float32') / 255.
+        return (image, label)
+
+class NormalizeTransform():
+    def __init__(self, mean=[0.0], std=[1.0]):
+        self.mean = mean
+        self.std = std
+        for item in self.std:
+            if item < 10**-6:
+                raise ValueError("Std should be greater than 0")
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
+        image = (image - self.mean) / self.std
+        return (image, label)
+
+class TransposeTransform():
+    def __init__(self, perm):
+        self.perm = perm
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
+        image = np.transpose(image, axes=self.perm)
+        return (image, label)
+
+np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
+           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
+           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
+           'float16': np.float16, 'float64': np.float64, 'bool': bool,
+           'string': str, 'complex128': np.complex128, 'int16': np.int16}
+
+class CastTransform():
+    def __init__(self, dtype='float32'):
+        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
+        self.dtype = dtype
+
+    def __call__(self, sample):
+        image, label = sample
+        image = image.astype(np_dtype_map[self.dtype])
+        return (image, label)
+
+class ComposeTransform():
+    def __init__(self, transform_list):
+        self.transform_list = transform_list
+
+    def __call__(self, sample):
+        for transform in self.transform_list:
+            sample = transform(sample)
+        return sample
+
+class COCOmAPv2():
+    """Compute mean average precision of the detection task."""
+
+    def __init__(self, 
+                 anno_path=None, 
+                 iou_thrs='0.5:0.05:0.95', 
+                 map_points=101, 
+                 map_key='DetectionBoxes_Precision/mAP', 
+                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
+        """Initialize the metric.
+        Args:
+            anno_path: The path of annotation file.
+            iou_thrs: Minimal value for intersection over union that allows to make decision
+              that prediction bounding box is true positive. You can specify one float value
+              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
+              11-point interpolated AP, 0 for area under PR curve.
+            map_key: The key that mapping to pycocotools COCOeval. 
+              Defaults to 'DetectionBoxes_Precision/mAP'.
+            output_index_mapping: The output index mapping. 
+              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+        """
+        self.output_index_mapping = output_index_mapping
+        from coco_label_map import category_map
+        if anno_path:
+            assert os.path.exists(anno_path), 'Annotation path does not exists!'
+            with open(anno_path, 'r') as f:
+                label_map = yaml.safe_load(f.read())
+            self.category_map_reverse = {k: v for k,v in label_map.items()}
+        else:
+            # label: index
+            self.category_map_reverse = {v: k for k, v in category_map.items()}
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+        self.category_map = category_map
+        self.category_id_set = set(
+            [cat for cat in self.category_map]) #index
+        self.iou_thrs = iou_thrs
+        self.map_points = map_points
+        self.map_key = map_key
+
+    def update(self, predicts, labels, sample_weight=None):
+        """Add the predictions and labels.
+        Args:
+            predicts: The predictions.
+            labels: The labels corresponding to the predictions.
+            sample_weight: The sample weight. Defaults to None.
+        """
+        from coco_tools import ExportSingleImageGroundtruthToCoco,\
+            ExportSingleImageDetectionBoxesToCoco
+        detections = []
+        if 'num_detections' in self.output_index_mapping and \
+            self.output_index_mapping['num_detections'] > -1:
+            for item in zip(*predicts):
+                detection = {}
+                num = int(item[self.output_index_mapping['num_detections']])
+                detection['boxes'] = np.asarray(
+                    item[self.output_index_mapping['boxes']])[0:num]
+                detection['scores'] = np.asarray(
+                    item[self.output_index_mapping['scores']])[0:num]
+                detection['classes'] = np.asarray(
+                    item[self.output_index_mapping['classes']])[0:num]
+                detections.append(detection)
+        else:
+            for item in zip(*predicts):
+                detection = {}
+                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
+                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
+                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
+                detections.append(detection)
+
+        bboxes, str_labels,int_labels, image_ids = labels
+        labels = []
+        if len(int_labels[0]) == 0:
+            for str_label in str_labels:
+                str_label = [
+                    x if type(x) == 'str' else x.decode('utf-8')
+                    for x in str_label
+                ]
+                labels.append([self.category_map_reverse[x] for x in str_label])
+        elif len(str_labels[0]) == 0:
+            for int_label in int_labels:
+                labels.append([x for x in int_label])
+
+        for idx, image_id in enumerate(image_ids):
+            image_id = image_id if type(
+                image_id) == 'str' else image_id.decode('utf-8')
+            if image_id in self.image_ids:
+                continue
+            self.image_ids.append(image_id)
+
+            ground_truth = {}
+            ground_truth['boxes'] = np.asarray(bboxes[idx])
+            ground_truth['classes'] = np.asarray(labels[idx])
+
+            self.ground_truth_list.extend(
+                ExportSingleImageGroundtruthToCoco(
+                    image_id=image_id,
+                    next_annotation_id=self.annotation_id,
+                    category_id_set=self.category_id_set,
+                    groundtruth_boxes=ground_truth['boxes'],
+                    groundtruth_classes=ground_truth['classes']))
+            self.annotation_id += ground_truth['boxes'].shape[0]
+
+            self.detection_list.extend(
+                ExportSingleImageDetectionBoxesToCoco(
+                    image_id=image_id,
+                    category_id_set=self.category_id_set,
+                    detection_boxes=detections[idx]['boxes'],
+                    detection_scores=detections[idx]['scores'],
+                    detection_classes=detections[idx]['classes']))
+
+    def reset(self):
+        """Reset the prediction and labels."""
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+
+    def result(self):
+        """Compute mean average precision.
+        Returns:
+            The mean average precision score.
+        """
+        from coco_tools import COCOWrapper, COCOEvalWrapper
+        if len(self.ground_truth_list) == 0:
+            return 0
+        else:
+            groundtruth_dict = {
+                'annotations':
+                self.ground_truth_list,
+                'images': [{
+                    'id': image_id
+                } for image_id in self.image_ids],
+                'categories': [{
+                    'id': k,
+                    'name': v
+                } for k, v in self.category_map.items()]
+            }
+            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
+                self.detection_list)
+            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
+                                                 coco_wrapped_detections,
+                                                 agnostic_mode=False,
+                                                 iou_thrs = self.iou_thrs,
+                                                 map_points = self.map_points)
+            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+                include_metrics_per_category=False, all_metrics_per_category=False)
+            box_metrics.update(box_per_category_ap)
+            box_metrics = {
+                'DetectionBoxes_' + key: value
+                for key, value in iter(box_metrics.items())
+            }
+
+            return box_metrics[self.map_key]
+
+
+class Post:
+    def __call__(self, sample):
+        preds, labels = sample
+        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
+        return preds, labels
+
+class LabelBalanceCOCORawFilter(object):
+    """The label balance filter for COCO raw data."""
+
+    def __init__(self, size=1):
+        """Initialize the attribute of class."""
+        self.size = size
+
+    def __call__(self, image, label):
+        """Execute the filter.
+
+        Args:
+            image: Not used.
+            label: label of a sample.
+        """
         return len(label) == self.size
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
similarity index 95%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
index 1e88f8abad8..e84acdd99c9 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
@@ -1,103 +1,103 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
 }
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
index 2c57fd61302..1eebfcb5ff5 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/mask_rcnn/quantization/ptq_static/coco_tools.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
@@ -1,672 +1,672 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
     return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
index f6123ea7102..413b045f29e 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
@@ -1,470 +1,470 @@
-import numpy as np
-import collections
-from PIL import Image
-import os
-import yaml
-from pycocotools.coco import COCO
-import cv2
-
-class SequentialSampler():
-    def __init__(self, dataset):
-        self.whole_dataset = dataset
-
-    def __iter__(self):
-        self.process_rank = 0 # The default rank is 0, which represents the main process
-        self.process_size = 1 # By default, process_size=1, only the main process is running
-        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
-
-    def __len__(self):
-        return len(self.whole_dataset)
-
-class BatchSampler():
-    def __init__(self, sampler, batch_size, drop_last=True):
-        if isinstance(drop_last, bool):
-            self.drop_last = drop_last
-        else:
-            raise ValueError("last_batch only support bool as input")
-
-        self.sampler = sampler
-        self.batch_size = batch_size
-        self.drop_last = drop_last
-
-    def __iter__(self):
-        batch = []
-        for idx in self.sampler:
-            batch.append(idx)
-            if len(batch) == self.batch_size:
-                yield batch
-                batch = []
-        if len(batch) > 0 and not self.drop_last:
-            yield batch
-
-    def __len__(self):
-        if self.drop_last:
-            return len(self.sampler) // self.batch_size
-        else:
-            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
-
-class IndexFetcher():
-    def __init__(self, dataset, collate_fn, drop_last):
-        self.dataset = dataset
-        self.collate_fn = collate_fn
-        self.drop_last = drop_last
-
-    def __call__(self, batched_indices):
-        data = [self.dataset[idx] for idx in batched_indices]
-        return self.collate_fn(data)
-
-
-def default_collate(batch):
-    """Merge data with outer dimension batch size."""
-    elem = batch[0]
-    if isinstance(elem, collections.abc.Mapping):
-        return {key: default_collate([d[key] for d in batch]) for key in elem}
-    elif isinstance(elem, collections.abc.Sequence):
-        batch = zip(*batch)
-        return [default_collate(samples) for samples in batch]
-    elif isinstance(elem, np.ndarray):
-        try:
-            return np.stack(batch)
-        except:
-            return batch
-    else:
-        return batch
-
-class COCORawDataloader():
-    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
-                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
-                 shuffle=False):
-        self.dataset = dataset
-        self.last_batch = last_batch
-        self.sampler = sampler
-        self.batch_sampler = batch_sampler
-        self.num_workers = num_workers
-        self.pin_memory = pin_memory
-        self.collate_fn = collate_fn
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.drop_last = False if last_batch == 'rollover' else True
-        if self.collate_fn == None:
-            self.collate_fn = default_collate
-
-    def __iter__(self):
-        """Yield data in iterative order."""
-        return self._generate_dataloader(
-            self.dataset,
-            batch_size=self.batch_size,
-            last_batch=self.last_batch,
-            collate_fn=self.collate_fn,
-            sampler=self.sampler,
-            batch_sampler=self.batch_sampler,
-            num_workers=self.num_workers,
-            pin_memory=self.pin_memory,
-            shuffle=self.shuffle)
-
-    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
-                             batch_sampler, num_workers, pin_memory, shuffle):
-
-        sampler = self._generate_sampler(dataset)
-        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
-        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
-
-        for batched_indices in self.batch_sampler:
-            try:
-                data = self.fetcher(batched_indices)
-                yield data
-            except StopIteration:
-                return
-
-    def _generate_sampler(self, dataset):
-        if hasattr(dataset, "__getitem__"):
-            self.dataset_type = 'index'
-            return SequentialSampler(dataset)
-        else:
-            raise ValueError("dataset type only support (index, iter)")
-
-
-class COCORawDataset():
-    """Coco raw dataset.
-    Please arrange data in this way:
-        root
-          ├──  1.jpg
-          ├──  2.jpg
-               ...
-          └──  n.jpg
-        anno_dir
-    Please use Resize transform when batch_size > 1
-    Args: root (str): Root directory of dataset.
-          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according 
-                                                 to specific conditions.
-    """
-
-    def __init__(self, root, \
-            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
-        """Initialize the attributes of class."""
-        self.batch_size = 1
-        self.image_list = []
-        self.transform = transform
-        img_path = root
-        anno_path = os.path.join(os.path.dirname(root), anno_dir)
-        coco = COCO(anno_path)
-        img_ids = coco.getImgIds()
-        cat_ids = coco.getCatIds()
-        for idx, img_id in enumerate(img_ids):
-            img_info = {}
-            bboxes = []
-            labels = []
-            ids = []
-            img_detail = coco.loadImgs(img_id)[0]
-            ids.append(img_detail['file_name'].encode('utf-8'))
-            pic_height = img_detail['height']
-            pic_width = img_detail['width']
-
-            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
-            anns = coco.loadAnns(ann_ids)
-            for ann in anns:
-                bbox = ann['bbox']
-                if len(bbox) == 0:
-                    continue
-                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
-                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
-                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
-                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
-            img_file = os.path.join(img_path, img_detail['file_name'])
-            if not os.path.exists(img_file) or len(bboxes) == 0:
-                continue
-
-            if filter and not filter(None, bboxes):
-                continue
-
-            with Image.open(img_file) as image:
-                image = np.array(image.convert('RGB'))
-            self.image_list.append(
-                (image, [np.array(bboxes), np.array(labels), np.array([]),\
-                 np.array(img_detail['file_name'].encode('utf-8'))]))
-
-    def __len__(self):
-        """Length of the dataset."""
-        return len(self.image_list)
-
-    def __getitem__(self, index):
-        """Magic method.
-        x[i] is roughly equivalent to type(x).__getitem__(x, index)
-        """
-        sample = self.image_list[index]
-        if self.transform is not None:
-            sample= self.transform(sample)
-        return sample
-
-interpolation_map = {
-    'nearest': cv2.INTER_NEAREST,
-    'bilinear': cv2.INTER_LINEAR,
-    'bicubic': cv2.INTER_CUBIC,
-}
-
-class ResizeTransform():
-    def __init__(self, size, interpolation='bilinear'):
-        if isinstance(size, int):
-            self.size = size, size
-        elif isinstance(size, list):
-            if len(size) == 1:
-                self.size = size[0], size[0]
-            elif len(size) == 2:
-                self.size = size[0], size[1]
-
-        if interpolation in interpolation_map.keys():
-            self.interpolation = interpolation_map[interpolation]
-        else:
-            raise ValueError("Undefined interpolation type")
-
-    def __call__(self, sample):
-        image, label = sample
-        image = cv2.resize(image, self.size, interpolation=self.interpolation)
-        if len(image.shape) == 2:
-            image = np.expand_dims(image, -1)
-        return (image, label)
-
-class RescaleTransform():
-    """Scale the values of image to [0,1].
-    Returns:
-        tuple of processed image and label
-    """
-
-    def __call__(self, sample):
-        """Scale the values of the image in sample."""
-        image, label = sample
-        if isinstance(image, np.ndarray):
-            image = image.astype('float32') / 255.
-        return (image, label)
-
-class NormalizeTransform():
-    def __init__(self, mean=[0.0], std=[1.0]):
-        self.mean = mean
-        self.std = std
-        for item in self.std:
-            if item < 10**-6:
-                raise ValueError("Std should be greater than 0")
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
-        image = (image - self.mean) / self.std
-        return (image, label)
-
-class TransposeTransform():
-    def __init__(self, perm):
-        self.perm = perm
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
-        image = np.transpose(image, axes=self.perm)
-        return (image, label)
-
-np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
-           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
-           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
-           'float16': np.float16, 'float64': np.float64, 'bool': bool,
-           'string': str, 'complex128': np.complex128, 'int16': np.int16}
-
-class CastTransform():
-    def __init__(self, dtype='float32'):
-        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
-        self.dtype = dtype
-
-    def __call__(self, sample):
-        image, label = sample
-        image = image.astype(np_dtype_map[self.dtype])
-        return (image, label)
-
-class ComposeTransform():
-    def __init__(self, transform_list):
-        self.transform_list = transform_list
-
-    def __call__(self, sample):
-        for transform in self.transform_list:
-            sample = transform(sample)
-        return sample
-
-class COCOmAPv2():
-    """Compute mean average precision of the detection task."""
-
-    def __init__(self, 
-                 anno_path=None, 
-                 iou_thrs='0.5:0.05:0.95', 
-                 map_points=101, 
-                 map_key='DetectionBoxes_Precision/mAP', 
-                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
-        """Initialize the metric.
-        Args:
-            anno_path: The path of annotation file.
-            iou_thrs: Minimal value for intersection over union that allows to make decision
-              that prediction bounding box is true positive. You can specify one float value
-              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
-            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
-              11-point interpolated AP, 0 for area under PR curve.
-            map_key: The key that mapping to pycocotools COCOeval. 
-              Defaults to 'DetectionBoxes_Precision/mAP'.
-            output_index_mapping: The output index mapping. 
-              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
-        """
-        self.output_index_mapping = output_index_mapping
-        from coco_label_map import category_map
-        if anno_path:
-            assert os.path.exists(anno_path), 'Annotation path does not exists!'
-            with open(anno_path, 'r') as f:
-                label_map = yaml.safe_load(f.read())
-            self.category_map_reverse = {k: v for k,v in label_map.items()}
-        else:
-            # label: index
-            self.category_map_reverse = {v: k for k, v in category_map.items()}
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-        self.category_map = category_map
-        self.category_id_set = set(
-            [cat for cat in self.category_map]) #index
-        self.iou_thrs = iou_thrs
-        self.map_points = map_points
-        self.map_key = map_key
-
-    def update(self, predicts, labels, sample_weight=None):
-        """Add the predictions and labels.
-        Args:
-            predicts: The predictions.
-            labels: The labels corresponding to the predictions.
-            sample_weight: The sample weight. Defaults to None.
-        """
-        from coco_tools import ExportSingleImageGroundtruthToCoco,\
-            ExportSingleImageDetectionBoxesToCoco
-        detections = []
-        if 'num_detections' in self.output_index_mapping and \
-            self.output_index_mapping['num_detections'] > -1:
-            for item in zip(*predicts):
-                detection = {}
-                num = int(item[self.output_index_mapping['num_detections']])
-                detection['boxes'] = np.asarray(
-                    item[self.output_index_mapping['boxes']])[0:num]
-                detection['scores'] = np.asarray(
-                    item[self.output_index_mapping['scores']])[0:num]
-                detection['classes'] = np.asarray(
-                    item[self.output_index_mapping['classes']])[0:num]
-                detections.append(detection)
-        else:
-            for item in zip(*predicts):
-                detection = {}
-                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
-                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
-                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
-                detections.append(detection)
-
-        bboxes, str_labels,int_labels, image_ids = labels
-        labels = []
-        if len(int_labels[0]) == 0:
-            for str_label in str_labels:
-                str_label = [
-                    x if type(x) == 'str' else x.decode('utf-8')
-                    for x in str_label
-                ]
-                labels.append([self.category_map_reverse[x] for x in str_label])
-        elif len(str_labels[0]) == 0:
-            for int_label in int_labels:
-                labels.append([x for x in int_label])
-
-        for idx, image_id in enumerate(image_ids):
-            image_id = image_id if type(
-                image_id) == 'str' else image_id.decode('utf-8')
-            if image_id in self.image_ids:
-                continue
-            self.image_ids.append(image_id)
-
-            ground_truth = {}
-            ground_truth['boxes'] = np.asarray(bboxes[idx])
-            ground_truth['classes'] = np.asarray(labels[idx])
-
-            self.ground_truth_list.extend(
-                ExportSingleImageGroundtruthToCoco(
-                    image_id=image_id,
-                    next_annotation_id=self.annotation_id,
-                    category_id_set=self.category_id_set,
-                    groundtruth_boxes=ground_truth['boxes'],
-                    groundtruth_classes=ground_truth['classes']))
-            self.annotation_id += ground_truth['boxes'].shape[0]
-
-            self.detection_list.extend(
-                ExportSingleImageDetectionBoxesToCoco(
-                    image_id=image_id,
-                    category_id_set=self.category_id_set,
-                    detection_boxes=detections[idx]['boxes'],
-                    detection_scores=detections[idx]['scores'],
-                    detection_classes=detections[idx]['classes']))
-
-    def reset(self):
-        """Reset the prediction and labels."""
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-
-    def result(self):
-        """Compute mean average precision.
-        Returns:
-            The mean average precision score.
-        """
-        from coco_tools import COCOWrapper, COCOEvalWrapper
-        if len(self.ground_truth_list) == 0:
-            return 0
-        else:
-            groundtruth_dict = {
-                'annotations':
-                self.ground_truth_list,
-                'images': [{
-                    'id': image_id
-                } for image_id in self.image_ids],
-                'categories': [{
-                    'id': k,
-                    'name': v
-                } for k, v in self.category_map.items()]
-            }
-            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
-            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
-                self.detection_list)
-            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
-                                                 coco_wrapped_detections,
-                                                 agnostic_mode=False,
-                                                 iou_thrs = self.iou_thrs,
-                                                 map_points = self.map_points)
-            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
-                include_metrics_per_category=False, all_metrics_per_category=False)
-            box_metrics.update(box_per_category_ap)
-            box_metrics = {
-                'DetectionBoxes_' + key: value
-                for key, value in iter(box_metrics.items())
-            }
-
-            return box_metrics[self.map_key]
-
-class Post:
-    def __call__(self, sample):
-        preds, labels = sample
-        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
-        return preds, labels
-
-class LabelBalanceCOCORawFilter(object):
-    """The label balance filter for COCO raw data."""
-
-    def __init__(self, size=1):
-        """Initialize the attribute of class."""
-        self.size = size
-
-    def __call__(self, image, label):
-        """Execute the filter.
-
-        Args:
-            image: Not used.
-            label: label of a sample.
-        """
+import numpy as np
+import collections
+from PIL import Image
+import os
+import yaml
+from pycocotools.coco import COCO
+import cv2
+
+class SequentialSampler():
+    def __init__(self, dataset):
+        self.whole_dataset = dataset
+
+    def __iter__(self):
+        self.process_rank = 0 # The default rank is 0, which represents the main process
+        self.process_size = 1 # By default, process_size=1, only the main process is running
+        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
+
+    def __len__(self):
+        return len(self.whole_dataset)
+
+class BatchSampler():
+    def __init__(self, sampler, batch_size, drop_last=True):
+        if isinstance(drop_last, bool):
+            self.drop_last = drop_last
+        else:
+            raise ValueError("last_batch only support bool as input")
+
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+
+    def __iter__(self):
+        batch = []
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
+                yield batch
+                batch = []
+        if len(batch) > 0 and not self.drop_last:
+            yield batch
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        else:
+            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
+
+class IndexFetcher():
+    def __init__(self, dataset, collate_fn, drop_last):
+        self.dataset = dataset
+        self.collate_fn = collate_fn
+        self.drop_last = drop_last
+
+    def __call__(self, batched_indices):
+        data = [self.dataset[idx] for idx in batched_indices]
+        return self.collate_fn(data)
+
+
+def default_collate(batch):
+    """Merge data with outer dimension batch size."""
+    elem = batch[0]
+    if isinstance(elem, collections.abc.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, collections.abc.Sequence):
+        batch = zip(*batch)
+        return [default_collate(samples) for samples in batch]
+    elif isinstance(elem, np.ndarray):
+        try:
+            return np.stack(batch)
+        except:
+            return batch
+    else:
+        return batch
+
+class COCORawDataloader():
+    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
+                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
+                 shuffle=False):
+        self.dataset = dataset
+        self.last_batch = last_batch
+        self.sampler = sampler
+        self.batch_sampler = batch_sampler
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.collate_fn = collate_fn
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = False if last_batch == 'rollover' else True
+        if self.collate_fn == None:
+            self.collate_fn = default_collate
+
+    def __iter__(self):
+        """Yield data in iterative order."""
+        return self._generate_dataloader(
+            self.dataset,
+            batch_size=self.batch_size,
+            last_batch=self.last_batch,
+            collate_fn=self.collate_fn,
+            sampler=self.sampler,
+            batch_sampler=self.batch_sampler,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=self.shuffle)
+
+    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
+                             batch_sampler, num_workers, pin_memory, shuffle):
+
+        sampler = self._generate_sampler(dataset)
+        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
+        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
+
+        for batched_indices in self.batch_sampler:
+            try:
+                data = self.fetcher(batched_indices)
+                yield data
+            except StopIteration:
+                return
+
+    def _generate_sampler(self, dataset):
+        if hasattr(dataset, "__getitem__"):
+            self.dataset_type = 'index'
+            return SequentialSampler(dataset)
+        else:
+            raise ValueError("dataset type only support (index, iter)")
+
+
+class COCORawDataset():
+    """Coco raw dataset.
+    Please arrange data in this way:
+        root
+          ├──  1.jpg
+          ├──  2.jpg
+               ...
+          └──  n.jpg
+        anno_dir
+    Please use Resize transform when batch_size > 1
+    Args: root (str): Root directory of dataset.
+          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
+          transform (transform object, default=None):  transform to process input data.
+          filter (Filter objects, default=None): filter out examples according 
+                                                 to specific conditions.
+    """
+
+    def __init__(self, root, \
+            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
+        """Initialize the attributes of class."""
+        self.batch_size = 1
+        self.image_list = []
+        self.transform = transform
+        img_path = root
+        anno_path = os.path.join(os.path.dirname(root), anno_dir)
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        cat_ids = coco.getCatIds()
+        for idx, img_id in enumerate(img_ids):
+            img_info = {}
+            bboxes = []
+            labels = []
+            ids = []
+            img_detail = coco.loadImgs(img_id)[0]
+            ids.append(img_detail['file_name'].encode('utf-8'))
+            pic_height = img_detail['height']
+            pic_width = img_detail['width']
+
+            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
+            anns = coco.loadAnns(ann_ids)
+            for ann in anns:
+                bbox = ann['bbox']
+                if len(bbox) == 0:
+                    continue
+                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
+                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
+                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
+                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
+            img_file = os.path.join(img_path, img_detail['file_name'])
+            if not os.path.exists(img_file) or len(bboxes) == 0:
+                continue
+
+            if filter and not filter(None, bboxes):
+                continue
+
+            with Image.open(img_file) as image:
+                image = np.array(image.convert('RGB'))
+            self.image_list.append(
+                (image, [np.array(bboxes), np.array(labels), np.array([]),\
+                 np.array(img_detail['file_name'].encode('utf-8'))]))
+
+    def __len__(self):
+        """Length of the dataset."""
+        return len(self.image_list)
+
+    def __getitem__(self, index):
+        """Magic method.
+        x[i] is roughly equivalent to type(x).__getitem__(x, index)
+        """
+        sample = self.image_list[index]
+        if self.transform is not None:
+            sample= self.transform(sample)
+        return sample
+
+interpolation_map = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+}
+
+class ResizeTransform():
+    def __init__(self, size, interpolation='bilinear'):
+        if isinstance(size, int):
+            self.size = size, size
+        elif isinstance(size, list):
+            if len(size) == 1:
+                self.size = size[0], size[0]
+            elif len(size) == 2:
+                self.size = size[0], size[1]
+
+        if interpolation in interpolation_map.keys():
+            self.interpolation = interpolation_map[interpolation]
+        else:
+            raise ValueError("Undefined interpolation type")
+
+    def __call__(self, sample):
+        image, label = sample
+        image = cv2.resize(image, self.size, interpolation=self.interpolation)
+        if len(image.shape) == 2:
+            image = np.expand_dims(image, -1)
+        return (image, label)
+
+class RescaleTransform():
+    """Scale the values of image to [0,1].
+    Returns:
+        tuple of processed image and label
+    """
+
+    def __call__(self, sample):
+        """Scale the values of the image in sample."""
+        image, label = sample
+        if isinstance(image, np.ndarray):
+            image = image.astype('float32') / 255.
+        return (image, label)
+
+class NormalizeTransform():
+    def __init__(self, mean=[0.0], std=[1.0]):
+        self.mean = mean
+        self.std = std
+        for item in self.std:
+            if item < 10**-6:
+                raise ValueError("Std should be greater than 0")
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
+        image = (image - self.mean) / self.std
+        return (image, label)
+
+class TransposeTransform():
+    def __init__(self, perm):
+        self.perm = perm
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
+        image = np.transpose(image, axes=self.perm)
+        return (image, label)
+
+np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
+           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
+           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
+           'float16': np.float16, 'float64': np.float64, 'bool': bool,
+           'string': str, 'complex128': np.complex128, 'int16': np.int16}
+
+class CastTransform():
+    def __init__(self, dtype='float32'):
+        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
+        self.dtype = dtype
+
+    def __call__(self, sample):
+        image, label = sample
+        image = image.astype(np_dtype_map[self.dtype])
+        return (image, label)
+
+class ComposeTransform():
+    def __init__(self, transform_list):
+        self.transform_list = transform_list
+
+    def __call__(self, sample):
+        for transform in self.transform_list:
+            sample = transform(sample)
+        return sample
+
+class COCOmAPv2():
+    """Compute mean average precision of the detection task."""
+
+    def __init__(self, 
+                 anno_path=None, 
+                 iou_thrs='0.5:0.05:0.95', 
+                 map_points=101, 
+                 map_key='DetectionBoxes_Precision/mAP', 
+                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
+        """Initialize the metric.
+        Args:
+            anno_path: The path of annotation file.
+            iou_thrs: Minimal value for intersection over union that allows to make decision
+              that prediction bounding box is true positive. You can specify one float value
+              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
+              11-point interpolated AP, 0 for area under PR curve.
+            map_key: The key that mapping to pycocotools COCOeval. 
+              Defaults to 'DetectionBoxes_Precision/mAP'.
+            output_index_mapping: The output index mapping. 
+              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+        """
+        self.output_index_mapping = output_index_mapping
+        from coco_label_map import category_map
+        if anno_path:
+            assert os.path.exists(anno_path), 'Annotation path does not exists!'
+            with open(anno_path, 'r') as f:
+                label_map = yaml.safe_load(f.read())
+            self.category_map_reverse = {k: v for k,v in label_map.items()}
+        else:
+            # label: index
+            self.category_map_reverse = {v: k for k, v in category_map.items()}
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+        self.category_map = category_map
+        self.category_id_set = set(
+            [cat for cat in self.category_map]) #index
+        self.iou_thrs = iou_thrs
+        self.map_points = map_points
+        self.map_key = map_key
+
+    def update(self, predicts, labels, sample_weight=None):
+        """Add the predictions and labels.
+        Args:
+            predicts: The predictions.
+            labels: The labels corresponding to the predictions.
+            sample_weight: The sample weight. Defaults to None.
+        """
+        from coco_tools import ExportSingleImageGroundtruthToCoco,\
+            ExportSingleImageDetectionBoxesToCoco
+        detections = []
+        if 'num_detections' in self.output_index_mapping and \
+            self.output_index_mapping['num_detections'] > -1:
+            for item in zip(*predicts):
+                detection = {}
+                num = int(item[self.output_index_mapping['num_detections']])
+                detection['boxes'] = np.asarray(
+                    item[self.output_index_mapping['boxes']])[0:num]
+                detection['scores'] = np.asarray(
+                    item[self.output_index_mapping['scores']])[0:num]
+                detection['classes'] = np.asarray(
+                    item[self.output_index_mapping['classes']])[0:num]
+                detections.append(detection)
+        else:
+            for item in zip(*predicts):
+                detection = {}
+                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
+                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
+                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
+                detections.append(detection)
+
+        bboxes, str_labels,int_labels, image_ids = labels
+        labels = []
+        if len(int_labels[0]) == 0:
+            for str_label in str_labels:
+                str_label = [
+                    x if type(x) == 'str' else x.decode('utf-8')
+                    for x in str_label
+                ]
+                labels.append([self.category_map_reverse[x] for x in str_label])
+        elif len(str_labels[0]) == 0:
+            for int_label in int_labels:
+                labels.append([x for x in int_label])
+
+        for idx, image_id in enumerate(image_ids):
+            image_id = image_id if type(
+                image_id) == 'str' else image_id.decode('utf-8')
+            if image_id in self.image_ids:
+                continue
+            self.image_ids.append(image_id)
+
+            ground_truth = {}
+            ground_truth['boxes'] = np.asarray(bboxes[idx])
+            ground_truth['classes'] = np.asarray(labels[idx])
+
+            self.ground_truth_list.extend(
+                ExportSingleImageGroundtruthToCoco(
+                    image_id=image_id,
+                    next_annotation_id=self.annotation_id,
+                    category_id_set=self.category_id_set,
+                    groundtruth_boxes=ground_truth['boxes'],
+                    groundtruth_classes=ground_truth['classes']))
+            self.annotation_id += ground_truth['boxes'].shape[0]
+
+            self.detection_list.extend(
+                ExportSingleImageDetectionBoxesToCoco(
+                    image_id=image_id,
+                    category_id_set=self.category_id_set,
+                    detection_boxes=detections[idx]['boxes'],
+                    detection_scores=detections[idx]['scores'],
+                    detection_classes=detections[idx]['classes']))
+
+    def reset(self):
+        """Reset the prediction and labels."""
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+
+    def result(self):
+        """Compute mean average precision.
+        Returns:
+            The mean average precision score.
+        """
+        from coco_tools import COCOWrapper, COCOEvalWrapper
+        if len(self.ground_truth_list) == 0:
+            return 0
+        else:
+            groundtruth_dict = {
+                'annotations':
+                self.ground_truth_list,
+                'images': [{
+                    'id': image_id
+                } for image_id in self.image_ids],
+                'categories': [{
+                    'id': k,
+                    'name': v
+                } for k, v in self.category_map.items()]
+            }
+            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
+                self.detection_list)
+            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
+                                                 coco_wrapped_detections,
+                                                 agnostic_mode=False,
+                                                 iou_thrs = self.iou_thrs,
+                                                 map_points = self.map_points)
+            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+                include_metrics_per_category=False, all_metrics_per_category=False)
+            box_metrics.update(box_per_category_ap)
+            box_metrics = {
+                'DetectionBoxes_' + key: value
+                for key, value in iter(box_metrics.items())
+            }
+
+            return box_metrics[self.map_key]
+
+class Post:
+    def __call__(self, sample):
+        preds, labels = sample
+        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
+        return preds, labels
+
+class LabelBalanceCOCORawFilter(object):
+    """The label balance filter for COCO raw data."""
+
+    def __init__(self, size=1):
+        """Initialize the attribute of class."""
+        self.size = size
+
+    def __call__(self, image, label):
+        """Execute the filter.
+
+        Args:
+            image: Not used.
+            label: label of a sample.
+        """
         return len(label) == self.size
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/README.md
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py
new file mode 100644
index 00000000000..e84acdd99c9
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py
@@ -0,0 +1,103 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
+}
\ No newline at end of file
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py
new file mode 100644
index 00000000000..1eebfcb5ff5
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py
@@ -0,0 +1,672 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/README.md
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py
new file mode 100644
index 00000000000..e84acdd99c9
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py
@@ -0,0 +1,103 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
+}
\ No newline at end of file
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py
new file mode 100644
index 00000000000..1eebfcb5ff5
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py
@@ -0,0 +1,672 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/README.md
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py
new file mode 100644
index 00000000000..e84acdd99c9
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py
@@ -0,0 +1,103 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
+}
\ No newline at end of file
diff --git a/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py
new file mode 100644
index 00000000000..1eebfcb5ff5
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py
@@ -0,0 +1,672 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/label_map.yaml
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py
similarity index 99%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py
index 8839afa6758..99a41882f6b 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py
+++ b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/main.py
@@ -99,7 +99,7 @@ def get_anchors(anchors_path, tiny=False):
     return anchors.reshape(3, 3, 2)
 
 IMAGE_INPUT_SZIE = 416
-ANCHORS = get_anchors("./yolov4_anchors.txt")
+ANCHORS = get_anchors("yolov4_anchors.txt")
 STRIDES = np.array([8, 16, 32])
 XYSCALE = [1.2, 1.1, 1.05]
 
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt b/examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt
similarity index 100%
rename from examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt
rename to examples/deprecated/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/yolov4_anchors.txt
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/README.md
diff --git a/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
new file mode 100644
index 00000000000..e84acdd99c9
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
@@ -0,0 +1,103 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
+}
\ No newline at end of file
diff --git a/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
new file mode 100644
index 00000000000..1eebfcb5ff5
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
@@ -0,0 +1,672 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
similarity index 97%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
index 04098e73f09..309e5bed882 100644
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/data_utils.py
@@ -1,470 +1,470 @@
-import numpy as np
-import collections
-from PIL import Image
-import os
-import yaml
-from pycocotools.coco import COCO
-import cv2
-
-class SequentialSampler():
-    def __init__(self, dataset):
-        self.whole_dataset = dataset
-
-    def __iter__(self):
-        self.process_rank = 0 # The default rank is 0, which represents the main process
-        self.process_size = 1 # By default, process_size=1, only the main process is running
-        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
-
-    def __len__(self):
-        return len(self.whole_dataset)
-
-class BatchSampler():
-    def __init__(self, sampler, batch_size, drop_last=True):
-        if isinstance(drop_last, bool):
-            self.drop_last = drop_last
-        else:
-            raise ValueError("last_batch only support bool as input")
-
-        self.sampler = sampler
-        self.batch_size = batch_size
-        self.drop_last = drop_last
-
-    def __iter__(self):
-        batch = []
-        for idx in self.sampler:
-            batch.append(idx)
-            if len(batch) == self.batch_size:
-                yield batch
-                batch = []
-        if len(batch) > 0 and not self.drop_last:
-            yield batch
-
-    def __len__(self):
-        if self.drop_last:
-            return len(self.sampler) // self.batch_size
-        else:
-            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
-
-class IndexFetcher():
-    def __init__(self, dataset, collate_fn, drop_last):
-        self.dataset = dataset
-        self.collate_fn = collate_fn
-        self.drop_last = drop_last
-
-    def __call__(self, batched_indices):
-        data = [self.dataset[idx] for idx in batched_indices]
-        return self.collate_fn(data)
-
-
-def default_collate(batch):
-    """Merge data with outer dimension batch size."""
-    elem = batch[0]
-    if isinstance(elem, collections.abc.Mapping):
-        return {key: default_collate([d[key] for d in batch]) for key in elem}
-    elif isinstance(elem, collections.abc.Sequence):
-        batch = zip(*batch)
-        return [default_collate(samples) for samples in batch]
-    elif isinstance(elem, np.ndarray):
-        try:
-            return np.stack(batch)
-        except:
-            return batch
-    else:
-        return batch
-
-class COCORawDataloader():
-    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
-                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
-                 shuffle=False):
-        self.dataset = dataset
-        self.last_batch = last_batch
-        self.sampler = sampler
-        self.batch_sampler = batch_sampler
-        self.num_workers = num_workers
-        self.pin_memory = pin_memory
-        self.collate_fn = collate_fn
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.drop_last = False if last_batch == 'rollover' else True
-        if self.collate_fn == None:
-            self.collate_fn = default_collate
-
-    def __iter__(self):
-        """Yield data in iterative order."""
-        return self._generate_dataloader(
-            self.dataset,
-            batch_size=self.batch_size,
-            last_batch=self.last_batch,
-            collate_fn=self.collate_fn,
-            sampler=self.sampler,
-            batch_sampler=self.batch_sampler,
-            num_workers=self.num_workers,
-            pin_memory=self.pin_memory,
-            shuffle=self.shuffle)
-
-    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
-                             batch_sampler, num_workers, pin_memory, shuffle):
-
-        sampler = self._generate_sampler(dataset)
-        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
-        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
-
-        for batched_indices in self.batch_sampler:
-            try:
-                data = self.fetcher(batched_indices)
-                yield data
-            except StopIteration:
-                return
-
-    def _generate_sampler(self, dataset):
-        if hasattr(dataset, "__getitem__"):
-            self.dataset_type = 'index'
-            return SequentialSampler(dataset)
-        else:
-            raise ValueError("dataset type only support (index, iter)")
-
-
-class COCORawDataset():
-    """Coco raw dataset.
-    Please arrange data in this way:
-        root
-          ├──  1.jpg
-          ├──  2.jpg
-               ...
-          └──  n.jpg
-        anno_dir
-    Please use Resize transform when batch_size > 1
-    Args: root (str): Root directory of dataset..
-          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according 
-                                                 to specific conditions.
-    """
-
-    def __init__(self, root, \
-            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
-        """Initialize the attributes of class."""
-        self.batch_size = 1
-        self.image_list = []
-        self.transform = transform
-        img_path = root
-        anno_path = os.path.join(os.path.dirname(root), anno_dir)
-        coco = COCO(anno_path)
-        img_ids = coco.getImgIds()
-        cat_ids = coco.getCatIds()
-        for idx, img_id in enumerate(img_ids):
-            img_info = {}
-            bboxes = []
-            labels = []
-            ids = []
-            img_detail = coco.loadImgs(img_id)[0]
-            ids.append(img_detail['file_name'].encode('utf-8'))
-            pic_height = img_detail['height']
-            pic_width = img_detail['width']
-
-            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
-            anns = coco.loadAnns(ann_ids)
-            for ann in anns:
-                bbox = ann['bbox']
-                if len(bbox) == 0:
-                    continue
-                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
-                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
-                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
-                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
-            img_file = os.path.join(img_path, img_detail['file_name'])
-            if not os.path.exists(img_file) or len(bboxes) == 0:
-                continue
-
-            if filter and not filter(None, bboxes):
-                continue
-
-            with Image.open(img_file) as image:
-                image = np.array(image.convert('RGB'))
-            self.image_list.append(
-                (image, [np.array(bboxes), np.array(labels), np.array([]),\
-                 np.array(img_detail['file_name'].encode('utf-8'))]))
-
-    def __len__(self):
-        """Length of the dataset."""
-        return len(self.image_list)
-
-    def __getitem__(self, index):
-        """Magic method.
-        x[i] is roughly equivalent to type(x).__getitem__(x, index)
-        """
-        sample = self.image_list[index]
-        if self.transform is not None:
-            sample= self.transform(sample)
-        return sample
-
-interpolation_map = {
-    'nearest': cv2.INTER_NEAREST,
-    'bilinear': cv2.INTER_LINEAR,
-    'bicubic': cv2.INTER_CUBIC,
-}
-
-class ResizeTransform():
-    def __init__(self, size, interpolation='bilinear'):
-        if isinstance(size, int):
-            self.size = size, size
-        elif isinstance(size, list):
-            if len(size) == 1:
-                self.size = size[0], size[0]
-            elif len(size) == 2:
-                self.size = size[0], size[1]
-
-        if interpolation in interpolation_map.keys():
-            self.interpolation = interpolation_map[interpolation]
-        else:
-            raise ValueError("Undefined interpolation type")
-
-    def __call__(self, sample):
-        image, label = sample
-        image = cv2.resize(image, self.size, interpolation=self.interpolation)
-        if len(image.shape) == 2:
-            image = np.expand_dims(image, -1)
-        return (image, label)
-
-class RescaleTransform():
-    """Scale the values of image to [0,1].
-    Returns:
-        tuple of processed image and label
-    """
-
-    def __call__(self, sample):
-        """Scale the values of the image in sample."""
-        image, label = sample
-        if isinstance(image, np.ndarray):
-            image = image.astype('float32') / 255.
-        return (image, label)
-
-class NormalizeTransform():
-    def __init__(self, mean=[0.0], std=[1.0]):
-        self.mean = mean
-        self.std = std
-        for item in self.std:
-            if item < 10**-6:
-                raise ValueError("Std should be greater than 0")
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
-        image = (image - self.mean) / self.std
-        return (image, label)
-
-class TransposeTransform():
-    def __init__(self, perm):
-        self.perm = perm
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
-        image = np.transpose(image, axes=self.perm)
-        return (image, label)
-
-np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
-           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
-           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
-           'float16': np.float16, 'float64': np.float64, 'bool': bool,
-           'string': str, 'complex128': np.complex128, 'int16': np.int16}
-
-class CastTransform():
-    def __init__(self, dtype='float32'):
-        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
-        self.dtype = dtype
-
-    def __call__(self, sample):
-        image, label = sample
-        image = image.astype(np_dtype_map[self.dtype])
-        return (image, label)
-
-class ComposeTransform():
-    def __init__(self, transform_list):
-        self.transform_list = transform_list
-
-    def __call__(self, sample):
-        for transform in self.transform_list:
-            sample = transform(sample)
-        return sample
-
-class COCOmAPv2():
-    """Compute mean average precision of the detection task."""
-
-    def __init__(self, 
-                 anno_path=None, 
-                 iou_thrs='0.5:0.05:0.95', 
-                 map_points=101, 
-                 map_key='DetectionBoxes_Precision/mAP', 
-                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
-        """Initialize the metric.
-        Args:
-            anno_path: The path of annotation file.
-            iou_thrs: Minimal value for intersection over union that allows to make decision
-              that prediction bounding box is true positive. You can specify one float value
-              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
-            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
-              11-point interpolated AP, 0 for area under PR curve.
-            map_key: The key that mapping to pycocotools COCOeval. 
-              Defaults to 'DetectionBoxes_Precision/mAP'.
-            output_index_mapping: The output index mapping. 
-              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
-        """
-        self.output_index_mapping = output_index_mapping
-        from coco_label_map import category_map
-        if anno_path:
-            assert os.path.exists(anno_path), 'Annotation path does not exists!'
-            with open(anno_path, 'r') as f:
-                label_map = yaml.safe_load(f.read())
-            self.category_map_reverse = {k: v for k,v in label_map.items()}
-        else:
-            # label: index
-            self.category_map_reverse = {v: k for k, v in category_map.items()}
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-        self.category_map = category_map
-        self.category_id_set = set(
-            [cat for cat in self.category_map]) #index
-        self.iou_thrs = iou_thrs
-        self.map_points = map_points
-        self.map_key = map_key
-
-    def update(self, predicts, labels, sample_weight=None):
-        """Add the predictions and labels.
-        Args:
-            predicts: The predictions.
-            labels: The labels corresponding to the predictions.
-            sample_weight: The sample weight. Defaults to None.
-        """
-        from coco_tools import ExportSingleImageGroundtruthToCoco,\
-            ExportSingleImageDetectionBoxesToCoco
-        detections = []
-        if 'num_detections' in self.output_index_mapping and \
-            self.output_index_mapping['num_detections'] > -1:
-            for item in zip(*predicts):
-                detection = {}
-                num = int(item[self.output_index_mapping['num_detections']])
-                detection['boxes'] = np.asarray(
-                    item[self.output_index_mapping['boxes']])[0:num]
-                detection['scores'] = np.asarray(
-                    item[self.output_index_mapping['scores']])[0:num]
-                detection['classes'] = np.asarray(
-                    item[self.output_index_mapping['classes']])[0:num]
-                detections.append(detection)
-        else:
-            for item in zip(*predicts):
-                detection = {}
-                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
-                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
-                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
-                detections.append(detection)
-
-        bboxes, str_labels,int_labels, image_ids = labels
-        labels = []
-        if len(int_labels[0]) == 0:
-            for str_label in str_labels:
-                str_label = [
-                    x if type(x) == 'str' else x.decode('utf-8')
-                    for x in str_label
-                ]
-                labels.append([self.category_map_reverse[x] for x in str_label])
-        elif len(str_labels[0]) == 0:
-            for int_label in int_labels:
-                labels.append([x for x in int_label])
-
-        for idx, image_id in enumerate(image_ids):
-            image_id = image_id if type(
-                image_id) == 'str' else image_id.decode('utf-8')
-            if image_id in self.image_ids:
-                continue
-            self.image_ids.append(image_id)
-
-            ground_truth = {}
-            ground_truth['boxes'] = np.asarray(bboxes[idx])
-            ground_truth['classes'] = np.asarray(labels[idx])
-
-            self.ground_truth_list.extend(
-                ExportSingleImageGroundtruthToCoco(
-                    image_id=image_id,
-                    next_annotation_id=self.annotation_id,
-                    category_id_set=self.category_id_set,
-                    groundtruth_boxes=ground_truth['boxes'],
-                    groundtruth_classes=ground_truth['classes']))
-            self.annotation_id += ground_truth['boxes'].shape[0]
-
-            self.detection_list.extend(
-                ExportSingleImageDetectionBoxesToCoco(
-                    image_id=image_id,
-                    category_id_set=self.category_id_set,
-                    detection_boxes=detections[idx]['boxes'],
-                    detection_scores=detections[idx]['scores'],
-                    detection_classes=detections[idx]['classes']))
-
-    def reset(self):
-        """Reset the prediction and labels."""
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-
-    def result(self):
-        """Compute mean average precision.
-        Returns:
-            The mean average precision score.
-        """
-        from coco_tools import COCOWrapper, COCOEvalWrapper
-        if len(self.ground_truth_list) == 0:
-            return 0
-        else:
-            groundtruth_dict = {
-                'annotations':
-                self.ground_truth_list,
-                'images': [{
-                    'id': image_id
-                } for image_id in self.image_ids],
-                'categories': [{
-                    'id': k,
-                    'name': v
-                } for k, v in self.category_map.items()]
-            }
-            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
-            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
-                self.detection_list)
-            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
-                                                 coco_wrapped_detections,
-                                                 agnostic_mode=False,
-                                                 iou_thrs = self.iou_thrs,
-                                                 map_points = self.map_points)
-            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
-                include_metrics_per_category=False, all_metrics_per_category=False)
-            box_metrics.update(box_per_category_ap)
-            box_metrics = {
-                'DetectionBoxes_' + key: value
-                for key, value in iter(box_metrics.items())
-            }
-
-            return box_metrics[self.map_key]
-
-class Post:
-    def __call__(self, sample):
-        preds, labels = sample
-        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
-        return preds, labels
-
-class LabelBalanceCOCORawFilter(object):
-    """The label balance filter for COCO raw data."""
-
-    def __init__(self, size=1):
-        """Initialize the attribute of class."""
-        self.size = size
-
-    def __call__(self, image, label):
-        """Execute the filter.
-
-        Args:
-            image: Not used.
-            label: label of a sample.
-        """
+import numpy as np
+import collections
+from PIL import Image
+import os
+import yaml
+from pycocotools.coco import COCO
+import cv2
+
+class SequentialSampler():
+    def __init__(self, dataset):
+        self.whole_dataset = dataset
+
+    def __iter__(self):
+        self.process_rank = 0 # The default rank is 0, which represents the main process
+        self.process_size = 1 # By default, process_size=1, only the main process is running
+        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
+
+    def __len__(self):
+        return len(self.whole_dataset)
+
+class BatchSampler():
+    def __init__(self, sampler, batch_size, drop_last=True):
+        if isinstance(drop_last, bool):
+            self.drop_last = drop_last
+        else:
+            raise ValueError("last_batch only support bool as input")
+
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+
+    def __iter__(self):
+        batch = []
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
+                yield batch
+                batch = []
+        if len(batch) > 0 and not self.drop_last:
+            yield batch
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        else:
+            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
+
+class IndexFetcher():
+    def __init__(self, dataset, collate_fn, drop_last):
+        self.dataset = dataset
+        self.collate_fn = collate_fn
+        self.drop_last = drop_last
+
+    def __call__(self, batched_indices):
+        data = [self.dataset[idx] for idx in batched_indices]
+        return self.collate_fn(data)
+
+
+def default_collate(batch):
+    """Merge data with outer dimension batch size."""
+    elem = batch[0]
+    if isinstance(elem, collections.abc.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, collections.abc.Sequence):
+        batch = zip(*batch)
+        return [default_collate(samples) for samples in batch]
+    elif isinstance(elem, np.ndarray):
+        try:
+            return np.stack(batch)
+        except:
+            return batch
+    else:
+        return batch
+
+class COCORawDataloader():
+    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
+                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
+                 shuffle=False):
+        self.dataset = dataset
+        self.last_batch = last_batch
+        self.sampler = sampler
+        self.batch_sampler = batch_sampler
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.collate_fn = collate_fn
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = False if last_batch == 'rollover' else True
+        if self.collate_fn == None:
+            self.collate_fn = default_collate
+
+    def __iter__(self):
+        """Yield data in iterative order."""
+        return self._generate_dataloader(
+            self.dataset,
+            batch_size=self.batch_size,
+            last_batch=self.last_batch,
+            collate_fn=self.collate_fn,
+            sampler=self.sampler,
+            batch_sampler=self.batch_sampler,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=self.shuffle)
+
+    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
+                             batch_sampler, num_workers, pin_memory, shuffle):
+
+        sampler = self._generate_sampler(dataset)
+        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
+        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
+
+        for batched_indices in self.batch_sampler:
+            try:
+                data = self.fetcher(batched_indices)
+                yield data
+            except StopIteration:
+                return
+
+    def _generate_sampler(self, dataset):
+        if hasattr(dataset, "__getitem__"):
+            self.dataset_type = 'index'
+            return SequentialSampler(dataset)
+        else:
+            raise ValueError("dataset type only support (index, iter)")
+
+
+class COCORawDataset():
+    """Coco raw dataset.
+    Please arrange data in this way:
+        root
+          ├──  1.jpg
+          ├──  2.jpg
+               ...
+          └──  n.jpg
+        anno_dir
+    Please use Resize transform when batch_size > 1
+    Args: root (str): Root directory of dataset..
+          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
+          transform (transform object, default=None):  transform to process input data.
+          filter (Filter objects, default=None): filter out examples according 
+                                                 to specific conditions.
+    """
+
+    def __init__(self, root, \
+            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
+        """Initialize the attributes of class."""
+        self.batch_size = 1
+        self.image_list = []
+        self.transform = transform
+        img_path = root
+        anno_path = os.path.join(os.path.dirname(root), anno_dir)
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        cat_ids = coco.getCatIds()
+        for idx, img_id in enumerate(img_ids):
+            img_info = {}
+            bboxes = []
+            labels = []
+            ids = []
+            img_detail = coco.loadImgs(img_id)[0]
+            ids.append(img_detail['file_name'].encode('utf-8'))
+            pic_height = img_detail['height']
+            pic_width = img_detail['width']
+
+            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
+            anns = coco.loadAnns(ann_ids)
+            for ann in anns:
+                bbox = ann['bbox']
+                if len(bbox) == 0:
+                    continue
+                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
+                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
+                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
+                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
+            img_file = os.path.join(img_path, img_detail['file_name'])
+            if not os.path.exists(img_file) or len(bboxes) == 0:
+                continue
+
+            if filter and not filter(None, bboxes):
+                continue
+
+            with Image.open(img_file) as image:
+                image = np.array(image.convert('RGB'))
+            self.image_list.append(
+                (image, [np.array(bboxes), np.array(labels), np.array([]),\
+                 np.array(img_detail['file_name'].encode('utf-8'))]))
+
+    def __len__(self):
+        """Length of the dataset."""
+        return len(self.image_list)
+
+    def __getitem__(self, index):
+        """Magic method.
+        x[i] is roughly equivalent to type(x).__getitem__(x, index)
+        """
+        sample = self.image_list[index]
+        if self.transform is not None:
+            sample= self.transform(sample)
+        return sample
+
+interpolation_map = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+}
+
+class ResizeTransform():
+    def __init__(self, size, interpolation='bilinear'):
+        if isinstance(size, int):
+            self.size = size, size
+        elif isinstance(size, list):
+            if len(size) == 1:
+                self.size = size[0], size[0]
+            elif len(size) == 2:
+                self.size = size[0], size[1]
+
+        if interpolation in interpolation_map.keys():
+            self.interpolation = interpolation_map[interpolation]
+        else:
+            raise ValueError("Undefined interpolation type")
+
+    def __call__(self, sample):
+        image, label = sample
+        image = cv2.resize(image, self.size, interpolation=self.interpolation)
+        if len(image.shape) == 2:
+            image = np.expand_dims(image, -1)
+        return (image, label)
+
+class RescaleTransform():
+    """Scale the values of image to [0,1].
+    Returns:
+        tuple of processed image and label
+    """
+
+    def __call__(self, sample):
+        """Scale the values of the image in sample."""
+        image, label = sample
+        if isinstance(image, np.ndarray):
+            image = image.astype('float32') / 255.
+        return (image, label)
+
+class NormalizeTransform():
+    def __init__(self, mean=[0.0], std=[1.0]):
+        self.mean = mean
+        self.std = std
+        for item in self.std:
+            if item < 10**-6:
+                raise ValueError("Std should be greater than 0")
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
+        image = (image - self.mean) / self.std
+        return (image, label)
+
+class TransposeTransform():
+    def __init__(self, perm):
+        self.perm = perm
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
+        image = np.transpose(image, axes=self.perm)
+        return (image, label)
+
+np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
+           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
+           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
+           'float16': np.float16, 'float64': np.float64, 'bool': bool,
+           'string': str, 'complex128': np.complex128, 'int16': np.int16}
+
+class CastTransform():
+    def __init__(self, dtype='float32'):
+        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
+        self.dtype = dtype
+
+    def __call__(self, sample):
+        image, label = sample
+        image = image.astype(np_dtype_map[self.dtype])
+        return (image, label)
+
+class ComposeTransform():
+    def __init__(self, transform_list):
+        self.transform_list = transform_list
+
+    def __call__(self, sample):
+        for transform in self.transform_list:
+            sample = transform(sample)
+        return sample
+
+class COCOmAPv2():
+    """Compute mean average precision of the detection task."""
+
+    def __init__(self, 
+                 anno_path=None, 
+                 iou_thrs='0.5:0.05:0.95', 
+                 map_points=101, 
+                 map_key='DetectionBoxes_Precision/mAP', 
+                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
+        """Initialize the metric.
+        Args:
+            anno_path: The path of annotation file.
+            iou_thrs: Minimal value for intersection over union that allows to make decision
+              that prediction bounding box is true positive. You can specify one float value
+              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
+              11-point interpolated AP, 0 for area under PR curve.
+            map_key: The key that mapping to pycocotools COCOeval. 
+              Defaults to 'DetectionBoxes_Precision/mAP'.
+            output_index_mapping: The output index mapping. 
+              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+        """
+        self.output_index_mapping = output_index_mapping
+        from coco_label_map import category_map
+        if anno_path:
+            assert os.path.exists(anno_path), 'Annotation path does not exists!'
+            with open(anno_path, 'r') as f:
+                label_map = yaml.safe_load(f.read())
+            self.category_map_reverse = {k: v for k,v in label_map.items()}
+        else:
+            # label: index
+            self.category_map_reverse = {v: k for k, v in category_map.items()}
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+        self.category_map = category_map
+        self.category_id_set = set(
+            [cat for cat in self.category_map]) #index
+        self.iou_thrs = iou_thrs
+        self.map_points = map_points
+        self.map_key = map_key
+
+    def update(self, predicts, labels, sample_weight=None):
+        """Add the predictions and labels.
+        Args:
+            predicts: The predictions.
+            labels: The labels corresponding to the predictions.
+            sample_weight: The sample weight. Defaults to None.
+        """
+        from coco_tools import ExportSingleImageGroundtruthToCoco,\
+            ExportSingleImageDetectionBoxesToCoco
+        detections = []
+        if 'num_detections' in self.output_index_mapping and \
+            self.output_index_mapping['num_detections'] > -1:
+            for item in zip(*predicts):
+                detection = {}
+                num = int(item[self.output_index_mapping['num_detections']])
+                detection['boxes'] = np.asarray(
+                    item[self.output_index_mapping['boxes']])[0:num]
+                detection['scores'] = np.asarray(
+                    item[self.output_index_mapping['scores']])[0:num]
+                detection['classes'] = np.asarray(
+                    item[self.output_index_mapping['classes']])[0:num]
+                detections.append(detection)
+        else:
+            for item in zip(*predicts):
+                detection = {}
+                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
+                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
+                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
+                detections.append(detection)
+
+        bboxes, str_labels,int_labels, image_ids = labels
+        labels = []
+        if len(int_labels[0]) == 0:
+            for str_label in str_labels:
+                str_label = [
+                    x if type(x) == 'str' else x.decode('utf-8')
+                    for x in str_label
+                ]
+                labels.append([self.category_map_reverse[x] for x in str_label])
+        elif len(str_labels[0]) == 0:
+            for int_label in int_labels:
+                labels.append([x for x in int_label])
+
+        for idx, image_id in enumerate(image_ids):
+            image_id = image_id if type(
+                image_id) == 'str' else image_id.decode('utf-8')
+            if image_id in self.image_ids:
+                continue
+            self.image_ids.append(image_id)
+
+            ground_truth = {}
+            ground_truth['boxes'] = np.asarray(bboxes[idx])
+            ground_truth['classes'] = np.asarray(labels[idx])
+
+            self.ground_truth_list.extend(
+                ExportSingleImageGroundtruthToCoco(
+                    image_id=image_id,
+                    next_annotation_id=self.annotation_id,
+                    category_id_set=self.category_id_set,
+                    groundtruth_boxes=ground_truth['boxes'],
+                    groundtruth_classes=ground_truth['classes']))
+            self.annotation_id += ground_truth['boxes'].shape[0]
+
+            self.detection_list.extend(
+                ExportSingleImageDetectionBoxesToCoco(
+                    image_id=image_id,
+                    category_id_set=self.category_id_set,
+                    detection_boxes=detections[idx]['boxes'],
+                    detection_scores=detections[idx]['scores'],
+                    detection_classes=detections[idx]['classes']))
+
+    def reset(self):
+        """Reset the prediction and labels."""
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+
+    def result(self):
+        """Compute mean average precision.
+        Returns:
+            The mean average precision score.
+        """
+        from coco_tools import COCOWrapper, COCOEvalWrapper
+        if len(self.ground_truth_list) == 0:
+            return 0
+        else:
+            groundtruth_dict = {
+                'annotations':
+                self.ground_truth_list,
+                'images': [{
+                    'id': image_id
+                } for image_id in self.image_ids],
+                'categories': [{
+                    'id': k,
+                    'name': v
+                } for k, v in self.category_map.items()]
+            }
+            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
+                self.detection_list)
+            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
+                                                 coco_wrapped_detections,
+                                                 agnostic_mode=False,
+                                                 iou_thrs = self.iou_thrs,
+                                                 map_points = self.map_points)
+            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+                include_metrics_per_category=False, all_metrics_per_category=False)
+            box_metrics.update(box_per_category_ap)
+            box_metrics = {
+                'DetectionBoxes_' + key: value
+                for key, value in iter(box_metrics.items())
+            }
+
+            return box_metrics[self.map_key]
+
+class Post:
+    def __call__(self, sample):
+        preds, labels = sample
+        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
+        return preds, labels
+
+class LabelBalanceCOCORawFilter(object):
+    """The label balance filter for COCO raw data."""
+
+    def __init__(self, size=1):
+        """Initialize the attribute of class."""
+        self.size = size
+
+    def __call__(self, image, label):
+        """Execute the filter.
+
+        Args:
+            image: Not used.
+            label: label of a sample.
+        """
         return len(label) == self.size
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/README.md
diff --git a/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py
new file mode 100644
index 00000000000..e84acdd99c9
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py
@@ -0,0 +1,103 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+
+"""The dict mapping category IDs to its names of labels."""
+
+category_map = {
+    1: 'person',
+    2: 'bicycle',
+    3: 'car',
+    4: 'motorcycle',
+    5: 'airplane',
+    6: 'bus',
+    7: 'train',
+    8: 'truck',
+    9: 'boat',
+    10: 'traffic light',
+    11: 'fire hydrant',
+    13: 'stop sign',
+    14: 'parking meter',
+    15: 'bench',
+    16: 'bird',
+    17: 'cat',
+    18: 'dog',
+    19: 'horse',
+    20: 'sheep',
+    21: 'cow',
+    22: 'elephant',
+    23: 'bear',
+    24: 'zebra',
+    25: 'giraffe',
+    27: 'backpack',
+    28: 'umbrella',
+    31: 'handbag',
+    32: 'tie',
+    33: 'suitcase',
+    34: 'frisbee',
+    35: 'skis',
+    36: 'snowboard',
+    37: 'sports ball',
+    38: 'kite',
+    39: 'baseball bat',
+    40: 'baseball glove',
+    41: 'skateboard',
+    42: 'surfboard',
+    43: 'tennis racket',
+    44: 'bottle',
+    46: 'wine glass',
+    47: 'cup',
+    48: 'fork',
+    49: 'knife',
+    50: 'spoon',
+    51: 'bowl',
+    52: 'banana',
+    53: 'apple',
+    54: 'sandwich',
+    55: 'orange',
+    56: 'broccoli',
+    57: 'carrot',
+    58: 'hot dog',
+    59: 'pizza',
+    60: 'donut',
+    61: 'cake',
+    62: 'chair',
+    63: 'couch',
+    64: 'potted plant',
+    65: 'bed',
+    67: 'dining table',
+    70: 'toilet',
+    72: 'tv',
+    73: 'laptop',
+    74: 'mouse',
+    75: 'remote',
+    76: 'keyboard',
+    77: 'cell phone',
+    78: 'microwave',
+    79: 'oven',
+    80: 'toaster',
+    81: 'sink',
+    82: 'refrigerator',
+    84: 'book',
+    85: 'clock',
+    86: 'vase',
+    87: 'scissors',
+    88: 'teddy bear',
+    89: 'hair drier',
+    90: 'toothbrush'
+}
\ No newline at end of file
diff --git a/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py
new file mode 100644
index 00000000000..1eebfcb5ff5
--- /dev/null
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py
@@ -0,0 +1,672 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Wrappers for third party pycocotools to be used within object_detection.
+Note that nothing in this file is tensorflow related and thus cannot
+be called directly as a slim metric, for example.
+TODO(jonathanhuang): wrap as a slim metric in metrics.py
+Usage example: given a set of images with ids in the list image_ids
+and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
+and detections (boxes, scores and classes), where elements of each list
+correspond to detections/annotations of a single image,
+then evaluation (in multi-class mode) can be invoked as follows:
+  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
+      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
+      max_num_classes, output_path=None)
+  detections_list = coco_tools.ExportDetectionsToCOCO(
+      image_ids, detection_boxes_list, detection_scores_list,
+      detection_classes_list, output_path=None)
+  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+  detections = groundtruth.LoadAnnotations(detections_list)
+  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                         agnostic_mode=False)
+  metrics = evaluator.ComputeMetrics()
+"""
+
+import copy
+import time
+
+import numpy as np
+
+from collections import OrderedDict
+from neural_compressor.utils import logger
+from pycocotools import coco
+from pycocotools import cocoeval
+from pycocotools import mask
+from typing import Any, Dict, List, Set, Union
+
+
+class COCOWrapper(coco.COCO):
+    """Wrapper for the pycocotools COCO class.
+    
+    Attributes:
+      dataset: a dictionary holding bounding box annotations in the COCO format.
+      detection_type: type of detections being wrapped. Can be one of ['bbox',
+        'segmentation']
+    """
+
+    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
+        """Construct a COCOWrapper.
+        See http://mscoco.org/dataset/#format for a description of the format.
+        By default, the coco.COCO class constructor reads from a JSON file.
+        This function duplicates the same behavior but loads from a dictionary,
+        allowing us to perform evaluation without writing to external storage.
+        Args:
+          dataset: a dictionary holding bounding box annotations in the COCO format.
+          detection_type: type of detections being wrapped. Can be one of ['bbox',
+            'segmentation']
+        Raises:
+          ValueError: if detection_type is unsupported.
+        """
+        supported_detection_types = ['bbox', 'segmentation']
+        if detection_type not in supported_detection_types:
+            raise ValueError('Unsupported detection type: {}. '
+                             'Supported values are: {}'.format(
+                                 detection_type, supported_detection_types))
+        self._detection_type = detection_type
+        coco.COCO.__init__(self)
+        self.dataset = dataset
+        self.createIndex()
+
+    def LoadAnnotations(self, annotations: list) -> coco.COCO:
+        """Load annotations dictionary into COCO datastructure.
+        See http://mscoco.org/dataset/#format for a description of the annotations
+        format.  As above, this function replicates the default behavior of the API
+        but does not require writing to external storage.
+        Args:
+          annotations: python list holding object detection results where each
+            detection is encoded as a dict with required keys ['image_id',
+            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
+            `detection_type`.
+        Returns:
+          a coco.COCO datastructure holding object detection annotations results
+        Raises:
+          ValueError: if (1) annotations is not a list or annotations do not 
+            correspond to the images contained in self.
+        """
+        results = coco.COCO()
+        results.dataset['images'] = [img for img in self.dataset['images']]
+
+        logger.info("Load and prepare annotation results.")
+        tic = time.time()
+
+        if not isinstance(annotations, list):
+            raise ValueError('annotations is not a list of objects')
+        annotation_img_ids = [ann['image_id'] for ann in annotations]
+        if (set(annotation_img_ids) != (set(annotation_img_ids)
+                                        & set(self.getImgIds()))):
+            raise ValueError('Results do not correspond to current coco set')
+        results.dataset['categories'] = copy.deepcopy(
+            self.dataset['categories'])
+        if self._detection_type == 'bbox':
+            for idx, ann in enumerate(annotations):
+                bb = ann['bbox']
+                ann['area'] = bb[2] * bb[3]
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        elif self._detection_type == 'segmentation':
+            for idx, ann in enumerate(annotations):
+                ann['area'] = mask.area(ann['segmentation'])
+                ann['bbox'] = mask.toBbox(ann['segmentation'])
+                ann['id'] = idx + 1
+                ann['iscrowd'] = 0
+        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
+
+        results.dataset['annotations'] = annotations
+        results.createIndex()
+        return results
+
+
+class COCOEvalWrapper(cocoeval.COCOeval):
+    """Wrapper for the pycocotools COCOeval class.
+    To evaluate, create two objects (groundtruth_dict and detections_list)
+    using the conventions listed at http://mscoco.org/dataset/#format.
+    Then call evaluation as follows:
+      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
+      detections = groundtruth.LoadAnnotations(detections_list)
+      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
+                                             agnostic_mode=False)
+      metrics = evaluator.ComputeMetrics()
+    """
+
+    def __init__(self,
+                 groundtruth: coco.COCO = None,
+                 detections: coco.COCO = None,
+                 agnostic_mode = False,
+                 iou_type: str = 'bbox',
+                 iou_thrs: Union[str, float] = None,
+                 map_points=None):
+        """Construct a COCOEvalWrapper.
+        Note that for the area-based metrics to be meaningful, detection and
+        groundtruth boxes must be in image coordinates measured in pixels.
+        Args:
+          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            groundtruth annotations
+          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
+            detections
+          agnostic_mode: boolean (default: False).  If True, evaluation ignores
+            class labels, treating all detections as proposals.
+          iou_thrs: Minimal value for intersection over union that allows to
+                    make decision that prediction bounding box is true positive.
+                    You can specify one float value between 0 to 1 or
+                    string "05:0.05:0.95" for standard COCO thresholds.
+          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
+          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
+                          11-point interpolated AP, 0 for area under PR curve.
+        """
+        cocoeval.COCOeval.__init__(self,
+                                   groundtruth,
+                                   detections,
+                                   iouType=iou_type)
+        if agnostic_mode:
+            self.params.useCats = 0
+        if iou_thrs == '0.5:0.05:0.95':
+          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
+            endpoint=True)
+        elif isinstance(iou_thrs, float):
+          self.params.iouThrs = [iou_thrs]
+
+        if map_points == 101:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
+            endpoint=True)
+        if map_points == 11:
+          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
+            endpoint=True)
+        if map_points == 0:
+          self.params.recThrs = [-1]
+
+
+    def GetCategory(self, category_id: int) -> dict:
+        """Fetch dictionary holding category information given category id.
+        Args:
+          category_id: integer id
+        Returns:
+          dictionary holding 'id', 'name'.
+        """
+        return self.cocoGt.cats[category_id]
+
+    def GetAgnosticMode(self) -> bool:
+        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
+        return self.params.useCats == 0
+
+    def GetCategoryIdList(self) -> List[int]:
+        """Return the list of IDs of all valid categories."""
+        return self.params.catIds
+
+    def accumulate(self, p: cocoeval.Params = None):
+        """Accumulate evaluation results per image and store it to self.eval.
+        
+        Args:
+          p: input params for evaluation
+        """
+        print('Accumulating evaluation results...')
+        tic = time.time()
+        if not self.evalImgs:
+            print('Please run evaluate() first')
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+        p.catIds = p.catIds if p.useCats == 1 else [-1]
+        T           = len(p.iouThrs)
+        R           = len(p.recThrs)
+        K           = len(p.catIds) if p.useCats else 1
+        A           = len(p.areaRng)
+        M           = len(p.maxDets)
+        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
+        recall      = -np.ones((T,K,A,M))
+        scores      = -np.ones((T,R,K,A,M))
+
+        # create dictionary for future indexing
+        _pe = self._paramsEval
+        print('-pe', _pe)
+        catIds = _pe.catIds if _pe.useCats else [-1]
+        setK = set(catIds)
+        setA = set(map(tuple, _pe.areaRng))
+        setM = set(_pe.maxDets)
+        setI = set(_pe.imgIds)
+        # get inds to evaluate
+        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
+        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
+        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
+        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
+        I0 = len(_pe.imgIds)
+        A0 = len(_pe.areaRng)
+        # retrieve E at each category, area range, and max number of detections
+        for k, k0 in enumerate(k_list):
+            Nk = k0*A0*I0
+            for a, a0 in enumerate(a_list):
+                Na = a0*I0
+                for m, maxDet in enumerate(m_list):
+                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
+                    E = [e for e in E if not e is None]
+                    if len(E) == 0: continue
+                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
+
+                    # different sorting method generates slightly different results.
+                    # mergesort is used to be consistent as Matlab implementation.
+                    inds = np.argsort(-dtScores, kind='mergesort')
+                    dtScoresSorted = dtScores[inds]
+
+                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
+                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
+                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
+                    npig = np.count_nonzero(gtIg==0 )
+                    if npig == 0: continue
+                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
+                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
+
+                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
+                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
+                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
+                        tp = np.array(tp)
+                        fp = np.array(fp)
+                        nd = len(tp)
+                        rc = tp / npig
+                        pr = tp / (fp+tp+np.spacing(1))
+
+                        # calculate precision
+                        if R == 1:
+                          rc = np.concatenate(([0.], rc, [1.]))
+                          pr = np.concatenate(([0.], pr, [0.]))
+
+                          # compute the precision envelope
+                          for i in range(pr.size - 1, 0, -1):
+                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
+
+                          # to calculate area under PR curve, look for points
+                          # where X axis (recall) changes value
+                          change_point = np.where(rc[1:] != rc[:-1])[0]
+                          # and sum (\Delta recall) * recall
+                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
+                            * pr[change_point + 1])
+                          precision[t,:,k,a,m] = np.array([res])
+                        else:
+                          q  = np.zeros((R,))
+
+                          # numpy is slow without cython optimization for accessing elements
+                          # use python array gets significant speed improvement
+                          pr = pr.tolist(); q = q.tolist()
+
+                          for i in range(nd-1, 0, -1):
+                            if pr[i] > pr[i-1]:
+                                pr[i-1] = pr[i]
+
+                          inds = np.searchsorted(rc, p.recThrs, side='left')
+                          try:
+                            for ri, pi in enumerate(inds):
+                              q[ri] = pr[pi]
+                          except:
+                            pass  
+                          precision[t,:,k,a,m] = np.array(q)
+
+                        # calculate recall
+                        if nd:
+                          recall[t,k,a,m] = rc[-1]
+                        else:
+                          recall[t,k,a,m] = 0
+
+                        # calculate score
+                        ss = np.zeros((R,))
+                        inds = np.searchsorted(rc, p.recThrs, side='left')
+                        try:
+                          for ri, pi in enumerate(inds):
+                            ss[ri] = dtScoresSorted[pi]
+                        except:
+                          pass  
+                        scores[t,:,k,a,m] = np.array(ss)
+        # exit(0)
+        self.eval = {
+            'params': p,
+            'counts': [T, R, K, A, M],
+            'precision': precision,
+            'recall':   recall,
+            'scores': scores,
+        }
+        toc = time.time()
+        print('DONE (t={:0.2f}s).'.format( toc-tic))
+
+
+    def ComputeMetrics(self,
+                       include_metrics_per_category: bool = False,
+                       all_metrics_per_category: bool = False): # pragma: no cover
+        """Compute detection metrics.
+        Args:
+          include_metrics_per_category: Whether include metrics per category.
+          all_metrics_per_category: Whether include all the summery metrics for
+            each category in per_category_ap. Be careful with setting it to true if
+            you have more than handful of categories, because it will pollute
+            your mldash.
+        Returns:
+          A tuple of (summary_metrics, per_category_ap), in which
+            (1) summary_metrics is a dictionary holding:
+              'Precision/mAP': mean average precision over classes averaged over IOU
+                thresholds ranging from .5 to .95 with .05 increments;
+              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
+              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
+              'Precision/mAP (small)': mean average precision for small objects
+                (area < 32^2 pixels);
+              'Precision/mAP (medium)': mean average precision for medium sized
+                objects (32^2 pixels < area < 96^2 pixels);
+              'Precision/mAP (large)': mean average precision for large objects
+                (96^2 pixels < area < 10000^2 pixels);
+              'Recall/AR@1': average recall with 1 detection;
+              'Recall/AR@10': average recall with 10 detections;
+              'Recall/AR@100': average recall with 100 detections;
+              'Recall/AR@100 (small)': average recall for small objects with 100
+                detections;
+              'Recall/AR@100 (medium)': average recall for medium objects with 100
+                detections;
+              'Recall/AR@100 (large)': average recall for large objects with 100
+                detections; 
+            and (2) per_category_ap is a dictionary holding category specific results with
+              keys of the form: 'Precision mAP ByCategory/category'
+              (without the supercategory part if no supercategories exist).
+          For backward compatibility 'PerformanceByCategory' is included in the
+            output regardless of all_metrics_per_category. If evaluating class-agnostic
+            mode, per_category_ap is an empty dictionary.
+        Raises:
+          ValueError: If category_stats does not exist.
+        """
+        self.evaluate()
+        self.accumulate()
+        self.summarize()
+
+        summary_metrics = OrderedDict([
+            ('Precision/mAP', self.stats[0]),
+            ('Precision/mAP@.50IOU', self.stats[1]),
+            ('Precision/mAP@.75IOU', self.stats[2]),
+            ('Precision/mAP (small)', self.stats[3]),
+            ('Precision/mAP (medium)', self.stats[4]),
+            ('Precision/mAP (large)', self.stats[5]),
+            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
+            ('Recall/AR@100', self.stats[8]),
+            ('Recall/AR@100 (small)', self.stats[9]),
+            ('Recall/AR@100 (medium)', self.stats[10]),
+            ('Recall/AR@100 (large)', self.stats[11])
+        ])
+        if not include_metrics_per_category:
+            return summary_metrics, {}
+        if not hasattr(self, 'category_stats'):
+            raise ValueError('Category stats do not exist')
+        per_category_ap = OrderedDict([])
+        if self.GetAgnosticMode():
+            return summary_metrics, per_category_ap
+        for category_index, category_id in enumerate(self.GetCategoryIdList()):
+            category = self.GetCategory(category_id)['name']
+            # Kept for backward compatilbility
+            # pylint: disable=no-member
+            per_category_ap['PerformanceByCategory/mAP/{}'.format(
+                category)] = self.category_stats[0][category_index]
+            if all_metrics_per_category:
+                per_category_ap['Precision mAP ByCategory/{}'.format(
+                    category)] = self.category_stats[0][category_index]
+                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[1][category_index]
+                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
+                    category)] = self.category_stats[2][category_index]
+                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[3][category_index]
+                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[4][category_index]
+                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[5][category_index]
+                per_category_ap['Recall AR@1 ByCategory/{}'.format(
+                    category)] = self.category_stats[6][category_index]
+                per_category_ap['Recall AR@10 ByCategory/{}'.format(
+                    category)] = self.category_stats[7][category_index]
+                per_category_ap['Recall AR@100 ByCategory/{}'.format(
+                    category)] = self.category_stats[8][category_index]
+                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
+                    category)] = self.category_stats[9][category_index]
+                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
+                    category)] = self.category_stats[10][category_index]
+                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
+                    category)] = self.category_stats[11][category_index]
+
+        return summary_metrics, per_category_ap
+
+
+def _ConvertBoxToCOCOFormat(box):
+    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
+    This is a utility function for converting from our internal
+    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
+    i.e., [xmin, ymin, width, height].
+    Args:
+      box: a numpy array in format of [ymin, xmin, ymax, xmax]
+    Returns:
+      A list of floats, in COCO format, representing [xmin, ymin, width, height]
+    """
+    return [
+        float(box[1]),
+        float(box[0]),
+        float(box[3] - box[1]),
+        float(box[2] - box[0])
+    ]
+
+
+def _RleCompress(masks):
+    """Compresses mask using Run-length encoding provided by pycocotools.
+    Args:
+      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
+        {0, 1}.
+    Returns:
+      A pycocotools Run-length encoding of the mask.
+    """
+    return mask.encode(np.asfortranarray(masks))
+
+
+def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
+                                       next_annotation_id: int,
+                                       category_id_set: Set[str],
+                                       groundtruth_boxes: np.array,
+                                       groundtruth_classes: np.array,
+                                       groundtruth_masks: Union[np.array, None] = None,
+                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
+    """Export groundtruth of a single image to COCO format.
+    This function converts groundtruth detection annotations represented as numpy
+    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
+    that the image_ids provided here must match the ones given to
+    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
+    correspondence - that is: groundtruth_boxes[i, :], and
+    groundtruth_classes[i] are associated with the same groundtruth annotation.
+    In the exported result, "area" fields are always set to the area of the
+    groundtruth bounding box.
+    Args:
+      image_id: a unique image identifier either of type integer or string.
+      next_annotation_id: integer specifying the first id to use for the
+        groundtruth annotations. All annotations are assigned a continuous integer
+        id starting from this value.
+      category_id_set: A set of valid class ids. Groundtruth with classes not in
+        category_id_set are dropped.
+      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
+      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
+      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
+        image_height, image_width] containing detection_masks.
+      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
+        indicating whether groundtruth boxes are crowd.
+    Returns:
+      A list of groundtruth annotations for a single image in the COCO format.
+    Raises:
+      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
+        right lengths or (2) if each of the elements inside these lists do not
+        have the correct shapes or (3) if image_ids are not integers
+    """
+    if len(groundtruth_classes.shape) != 1:
+        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
+    if len(groundtruth_boxes.shape) != 2:
+        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
+    if groundtruth_boxes.shape[1] != 4:
+        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
+    num_boxes = groundtruth_classes.shape[0]
+    if num_boxes != groundtruth_boxes.shape[0]:
+        raise ValueError(
+            'Corresponding entries in groundtruth_classes, '
+            'and groundtruth_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension).'
+            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
+            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
+             image_id))
+    has_is_crowd = groundtruth_is_crowd is not None
+    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
+        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
+    groundtruth_list = []
+    for i in range(num_boxes):
+        if groundtruth_classes[i] in category_id_set:
+            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
+            export_dict = {
+                'id':
+                next_annotation_id + i,
+                'image_id':
+                image_id,
+                'category_id':
+                int(groundtruth_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
+                'area':
+                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
+                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
+                'iscrowd':
+                iscrowd
+            }
+            if groundtruth_masks is not None:
+                export_dict['segmentation'] = _RleCompress(
+                    groundtruth_masks[i])
+            groundtruth_list.append(export_dict)
+    return groundtruth_list
+
+
+def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
+                                          category_id_set: Set[int],
+                                          detection_boxes: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detections of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. Note that the image_ids
+    provided here must match the ones given to the
+    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
+    correspondence - that is: boxes[i, :], and classes[i]
+    are associated with the same groundtruth annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+          category_id_set are dropped.
+        detection_boxes: float numpy array of shape [num_detections, 4] containing
+          detection boxes.
+        detection_scores: float numpy array of shape [num_detections] containing
+          scored for the detection boxes.
+        detection_classes: integer numpy array of shape [num_detections] containing
+          the classes for detection boxes.
+    Returns:
+        A list of detection annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_boxes, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    if len(detection_boxes.shape) != 2:
+        raise ValueError('All entries in detection_boxes expected to be of '
+                         'rank 2.')
+    if detection_boxes.shape[1] != 4:
+        raise ValueError('All entries in detection_boxes should have '
+                         'shape[1] == 4.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
+        raise ValueError(
+            'Corresponding entries in detection_classes, '
+            'detection_scores and detection_boxes should have '
+            'compatible shapes (i.e., agree on the 0th dimension). '
+            'Classes shape: %d. Boxes shape: %d. '
+            'Scores shape: %d' %
+            (detection_classes.shape[0], detection_boxes.shape[0],
+             detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'bbox':
+                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
+
+
+def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
+                                          category_id_set: Set[int],
+                                          detection_masks: np.array, 
+                                          detection_scores: np.array,
+                                          detection_classes: np.array) -> list:
+    """Export detection masks of a single image to COCO format.
+    This function converts detections represented as numpy arrays to dictionaries
+    that can be ingested by the COCO evaluation API. We assume that
+    detection_masks, detection_scores, and detection_classes are in correspondence
+    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
+        are associated with the same annotation.
+    Args:
+        image_id: unique image identifier either of type integer or string.
+        category_id_set: A set of valid class ids. Detections with classes not in
+        category_id_set are dropped.
+        detection_masks: uint8 numpy array of shape [num_detections, image_height,
+        image_width] containing detection_masks.
+        detection_scores: float numpy array of shape [num_detections] containing
+        scores for detection masks.
+        detection_classes: integer numpy array of shape [num_detections] containing
+        the classes for detection masks.
+    Returns:
+        A list of detection mask annotations for a single image in the COCO format.
+    Raises:
+        ValueError: if (1) detection_masks, detection_scores and detection_classes
+        do not have the right lengths or (2) if each of the elements inside these
+        lists do not have the correct shapes or (3) if image_ids are not integers.
+    """
+    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
+        raise ValueError(
+            'All entries in detection_classes and detection_scores'
+            'expected to be of rank 1.')
+    num_boxes = detection_classes.shape[0]
+    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
+        raise ValueError('Corresponding entries in detection_classes, '
+                         'detection_scores and detection_masks should have '
+                         'compatible lengths and shapes '
+                         'Classes length: %d.  Masks length: %d. '
+                         'Scores length: %d' %
+                         (detection_classes.shape[0], len(detection_masks),
+                          detection_scores.shape[0]))
+    detections_list = []
+    for i in range(num_boxes):
+        if detection_classes[i] in category_id_set:
+            detections_list.append({
+                'image_id':
+                image_id,
+                'category_id':
+                int(detection_classes[i]),
+                'segmentation':
+                _RleCompress(detection_masks[i]),
+                'score':
+                float(detection_scores[i])
+            })
+    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py
similarity index 97%
rename from examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py
index 31e4ef42e82..413b045f29e 100644
--- a/examples/onnxrt/object_detection/onnx_model_zoo/ssd/quantization/ptq_static/data_utils.py
+++ b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/data_utils.py
@@ -1,471 +1,470 @@
-import numpy as np
-import collections
-from PIL import Image
-import os
-import yaml
-from pycocotools.coco import COCO
-import cv2
-
-class SequentialSampler():
-    def __init__(self, dataset):
-        self.whole_dataset = dataset
-
-    def __iter__(self):
-        self.process_rank = 0 # The default rank is 0, which represents the main process
-        self.process_size = 1 # By default, process_size=1, only the main process is running
-        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
-
-    def __len__(self):
-        return len(self.whole_dataset)
-
-class BatchSampler():
-    def __init__(self, sampler, batch_size, drop_last=True):
-        if isinstance(drop_last, bool):
-            self.drop_last = drop_last
-        else:
-            raise ValueError("last_batch only support bool as input")
-
-        self.sampler = sampler
-        self.batch_size = batch_size
-        self.drop_last = drop_last
-
-    def __iter__(self):
-        batch = []
-        for idx in self.sampler:
-            batch.append(idx)
-            if len(batch) == self.batch_size:
-                yield batch
-                batch = []
-        if len(batch) > 0 and not self.drop_last:
-            yield batch
-
-    def __len__(self):
-        if self.drop_last:
-            return len(self.sampler) // self.batch_size
-        else:
-            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
-
-class IndexFetcher():
-    def __init__(self, dataset, collate_fn, drop_last):
-        self.dataset = dataset
-        self.collate_fn = collate_fn
-        self.drop_last = drop_last
-
-    def __call__(self, batched_indices):
-        data = [self.dataset[idx] for idx in batched_indices]
-        return self.collate_fn(data)
-
-
-def default_collate(batch):
-    """Merge data with outer dimension batch size."""
-    elem = batch[0]
-    if isinstance(elem, collections.abc.Mapping):
-        return {key: default_collate([d[key] for d in batch]) for key in elem}
-    elif isinstance(elem, collections.abc.Sequence):
-        batch = zip(*batch)
-        return [default_collate(samples) for samples in batch]
-    elif isinstance(elem, np.ndarray):
-        try:
-            return np.stack(batch)
-        except:
-            return batch
-    else:
-        return batch
-
-class COCORawDataloader():
-    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
-                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
-                 shuffle=False):
-        self.dataset = dataset
-        self.last_batch = last_batch
-        self.sampler = sampler
-        self.batch_sampler = batch_sampler
-        self.num_workers = num_workers
-        self.pin_memory = pin_memory
-        self.collate_fn = collate_fn
-        self.batch_size = batch_size
-        self.shuffle = shuffle
-        self.drop_last = False if last_batch == 'rollover' else True
-        if self.collate_fn == None:
-            self.collate_fn = default_collate
-
-    def __iter__(self):
-        """Yield data in iterative order."""
-        return self._generate_dataloader(
-            self.dataset,
-            batch_size=self.batch_size,
-            last_batch=self.last_batch,
-            collate_fn=self.collate_fn,
-            sampler=self.sampler,
-            batch_sampler=self.batch_sampler,
-            num_workers=self.num_workers,
-            pin_memory=self.pin_memory,
-            shuffle=self.shuffle)
-
-    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
-                             batch_sampler, num_workers, pin_memory, shuffle):
-
-        sampler = self._generate_sampler(dataset)
-        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
-        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
-
-        for batched_indices in self.batch_sampler:
-            try:
-                data = self.fetcher(batched_indices)
-                yield data
-            except StopIteration:
-                return
-
-    def _generate_sampler(self, dataset):
-        if hasattr(dataset, "__getitem__"):
-            self.dataset_type = 'index'
-            return SequentialSampler(dataset)
-        else:
-            raise ValueError("dataset type only support (index, iter)")
-
-
-class COCORawDataset():
-    """Coco raw dataset.
-    Please arrange data in this way:
-        root
-          ├──  1.jpg
-          ├──  2.jpg
-               ...
-          └──  n.jpg
-        anno_dir
-    Please use Resize transform when batch_size > 1
-    Args: root (str): Root directory of dataset.
-          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
-          transform (transform object, default=None):  transform to process input data.
-          filter (Filter objects, default=None): filter out examples according 
-                                                 to specific conditions.
-    """
-
-    def __init__(self, root, \
-            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
-        """Initialize the attributes of class."""
-        self.batch_size = 1
-        self.image_list = []
-        self.transform = transform
-        img_path = root
-        anno_path = os.path.join(os.path.dirname(root), anno_dir)
-        coco = COCO(anno_path)
-        img_ids = coco.getImgIds()
-        cat_ids = coco.getCatIds()
-        for idx, img_id in enumerate(img_ids):
-            img_info = {}
-            bboxes = []
-            labels = []
-            ids = []
-            img_detail = coco.loadImgs(img_id)[0]
-            ids.append(img_detail['file_name'].encode('utf-8'))
-            pic_height = img_detail['height']
-            pic_width = img_detail['width']
-
-            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
-            anns = coco.loadAnns(ann_ids)
-            for ann in anns:
-                bbox = ann['bbox']
-                if len(bbox) == 0:
-                    continue
-                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
-                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
-                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
-                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
-            img_file = os.path.join(img_path, img_detail['file_name'])
-            if not os.path.exists(img_file) or len(bboxes) == 0:
-                continue
-
-            if filter and not filter(None, bboxes):
-                continue
-
-            with Image.open(img_file) as image:
-                image = np.array(image.convert('RGB'))
-            self.image_list.append(
-                (image, [np.array(bboxes), np.array(labels), np.array([]),\
-                 np.array(img_detail['file_name'].encode('utf-8'))]))
-
-    def __len__(self):
-        """Length of the dataset."""
-        return len(self.image_list)
-
-    def __getitem__(self, index):
-        """Magic method.
-        x[i] is roughly equivalent to type(x).__getitem__(x, index)
-        """
-        sample = self.image_list[index]
-        if self.transform is not None:
-            sample= self.transform(sample)
-        return sample
-
-interpolation_map = {
-    'nearest': cv2.INTER_NEAREST,
-    'bilinear': cv2.INTER_LINEAR,
-    'bicubic': cv2.INTER_CUBIC,
-}
-
-class ResizeTransform():
-    def __init__(self, size, interpolation='bilinear'):
-        if isinstance(size, int):
-            self.size = size, size
-        elif isinstance(size, list):
-            if len(size) == 1:
-                self.size = size[0], size[0]
-            elif len(size) == 2:
-                self.size = size[0], size[1]
-
-        if interpolation in interpolation_map.keys():
-            self.interpolation = interpolation_map[interpolation]
-        else:
-            raise ValueError("Undefined interpolation type")
-
-    def __call__(self, sample):
-        image, label = sample
-        image = cv2.resize(image, self.size, interpolation=self.interpolation)
-        if len(image.shape) == 2:
-            image = np.expand_dims(image, -1)
-        return (image, label)
-
-class RescaleTransform():
-    """Scale the values of image to [0,1].
-    Returns:
-        tuple of processed image and label
-    """
-
-    def __call__(self, sample):
-        """Scale the values of the image in sample."""
-        image, label = sample
-        if isinstance(image, np.ndarray):
-            image = image.astype('float32') / 255.
-        return (image, label)
-
-class NormalizeTransform():
-    def __init__(self, mean=[0.0], std=[1.0]):
-        self.mean = mean
-        self.std = std
-        for item in self.std:
-            if item < 10**-6:
-                raise ValueError("Std should be greater than 0")
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
-        image = (image - self.mean) / self.std
-        return (image, label)
-
-class TransposeTransform():
-    def __init__(self, perm):
-        self.perm = perm
-
-    def __call__(self, sample):
-        image, label = sample
-        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
-        image = np.transpose(image, axes=self.perm)
-        return (image, label)
-
-np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
-           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
-           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
-           'float16': np.float16, 'float64': np.float64, 'bool': bool,
-           'string': str, 'complex128': np.complex128, 'int16': np.int16}
-
-class CastTransform():
-    def __init__(self, dtype='float32'):
-        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
-        self.dtype = dtype
-
-    def __call__(self, sample):
-        image, label = sample
-        image = image.astype(np_dtype_map[self.dtype])
-        return (image, label)
-
-class ComposeTransform():
-    def __init__(self, transform_list):
-        self.transform_list = transform_list
-
-    def __call__(self, sample):
-        for transform in self.transform_list:
-            sample = transform(sample)
-        return sample
-
-class COCOmAPv2():
-    """Compute mean average precision of the detection task."""
-
-    def __init__(self, 
-                 anno_path=None, 
-                 iou_thrs='0.5:0.05:0.95', 
-                 map_points=101, 
-                 map_key='DetectionBoxes_Precision/mAP', 
-                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
-        """Initialize the metric.
-        Args:
-            anno_path: The path of annotation file.
-            iou_thrs: Minimal value for intersection over union that allows to make decision
-              that prediction bounding box is true positive. You can specify one float value
-              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
-            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
-              11-point interpolated AP, 0 for area under PR curve.
-            map_key: The key that mapping to pycocotools COCOeval. 
-              Defaults to 'DetectionBoxes_Precision/mAP'.
-            output_index_mapping: The output index mapping. 
-              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
-        """
-        self.output_index_mapping = output_index_mapping
-        from coco_label_map import category_map
-        if anno_path:
-            assert os.path.exists(anno_path), 'Annotation path does not exists!'
-            with open(anno_path, 'r') as f:
-                label_map = yaml.safe_load(f.read())
-            self.category_map_reverse = {k: v for k,v in label_map.items()}
-        else:
-            # label: index
-            self.category_map_reverse = {v: k for k, v in category_map.items()}
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-        self.category_map = category_map
-        self.category_id_set = set(
-            [cat for cat in self.category_map]) #index
-        self.iou_thrs = iou_thrs
-        self.map_points = map_points
-        self.map_key = map_key
-
-    def update(self, predicts, labels, sample_weight=None):
-        """Add the predictions and labels.
-        Args:
-            predicts: The predictions.
-            labels: The labels corresponding to the predictions.
-            sample_weight: The sample weight. Defaults to None.
-        """
-        from coco_tools import ExportSingleImageGroundtruthToCoco,\
-            ExportSingleImageDetectionBoxesToCoco
-        detections = []
-        if 'num_detections' in self.output_index_mapping and \
-            self.output_index_mapping['num_detections'] > -1:
-            for item in zip(*predicts):
-                detection = {}
-                num = int(item[self.output_index_mapping['num_detections']])
-                detection['boxes'] = np.asarray(
-                    item[self.output_index_mapping['boxes']])[0:num]
-                detection['scores'] = np.asarray(
-                    item[self.output_index_mapping['scores']])[0:num]
-                detection['classes'] = np.asarray(
-                    item[self.output_index_mapping['classes']])[0:num]
-                detections.append(detection)
-        else:
-            for item in zip(*predicts):
-                detection = {}
-                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
-                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
-                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
-                detections.append(detection)
-
-        bboxes, str_labels,int_labels, image_ids = labels
-        labels = []
-        if len(int_labels[0]) == 0:
-            for str_label in str_labels:
-                str_label = [
-                    x if type(x) == 'str' else x.decode('utf-8')
-                    for x in str_label
-                ]
-                labels.append([self.category_map_reverse[x] for x in str_label])
-        elif len(str_labels[0]) == 0:
-            for int_label in int_labels:
-                labels.append([x for x in int_label])
-
-        for idx, image_id in enumerate(image_ids):
-            image_id = image_id if type(
-                image_id) == 'str' else image_id.decode('utf-8')
-            if image_id in self.image_ids:
-                continue
-            self.image_ids.append(image_id)
-
-            ground_truth = {}
-            ground_truth['boxes'] = np.asarray(bboxes[idx])
-            ground_truth['classes'] = np.asarray(labels[idx])
-
-            self.ground_truth_list.extend(
-                ExportSingleImageGroundtruthToCoco(
-                    image_id=image_id,
-                    next_annotation_id=self.annotation_id,
-                    category_id_set=self.category_id_set,
-                    groundtruth_boxes=ground_truth['boxes'],
-                    groundtruth_classes=ground_truth['classes']))
-            self.annotation_id += ground_truth['boxes'].shape[0]
-
-            self.detection_list.extend(
-                ExportSingleImageDetectionBoxesToCoco(
-                    image_id=image_id,
-                    category_id_set=self.category_id_set,
-                    detection_boxes=detections[idx]['boxes'],
-                    detection_scores=detections[idx]['scores'],
-                    detection_classes=detections[idx]['classes']))
-
-    def reset(self):
-        """Reset the prediction and labels."""
-        self.image_ids = []
-        self.ground_truth_list = []
-        self.detection_list = []
-        self.annotation_id = 1
-
-    def result(self):
-        """Compute mean average precision.
-        Returns:
-            The mean average precision score.
-        """
-        from coco_tools import COCOWrapper, COCOEvalWrapper
-        if len(self.ground_truth_list) == 0:
-            return 0
-        else:
-            groundtruth_dict = {
-                'annotations':
-                self.ground_truth_list,
-                'images': [{
-                    'id': image_id
-                } for image_id in self.image_ids],
-                'categories': [{
-                    'id': k,
-                    'name': v
-                } for k, v in self.category_map.items()]
-            }
-            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
-            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
-                self.detection_list)
-            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
-                                                 coco_wrapped_detections,
-                                                 agnostic_mode=False,
-                                                 iou_thrs = self.iou_thrs,
-                                                 map_points = self.map_points)
-            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
-                include_metrics_per_category=False, all_metrics_per_category=False)
-            box_metrics.update(box_per_category_ap)
-            box_metrics = {
-                'DetectionBoxes_' + key: value
-                for key, value in iter(box_metrics.items())
-            }
-
-            return box_metrics[self.map_key]
-
-
-class Post:
-    def __call__(self, sample):
-        preds, labels = sample
-        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
-        return preds, labels
-
-class LabelBalanceCOCORawFilter(object):
-    """The label balance filter for COCO raw data."""
-
-    def __init__(self, size=1):
-        """Initialize the attribute of class."""
-        self.size = size
-
-    def __call__(self, image, label):
-        """Execute the filter.
-
-        Args:
-            image: Not used.
-            label: label of a sample.
-        """
+import numpy as np
+import collections
+from PIL import Image
+import os
+import yaml
+from pycocotools.coco import COCO
+import cv2
+
+class SequentialSampler():
+    def __init__(self, dataset):
+        self.whole_dataset = dataset
+
+    def __iter__(self):
+        self.process_rank = 0 # The default rank is 0, which represents the main process
+        self.process_size = 1 # By default, process_size=1, only the main process is running
+        return iter(range(self.process_rank, len(self.whole_dataset), self.process_size))
+
+    def __len__(self):
+        return len(self.whole_dataset)
+
+class BatchSampler():
+    def __init__(self, sampler, batch_size, drop_last=True):
+        if isinstance(drop_last, bool):
+            self.drop_last = drop_last
+        else:
+            raise ValueError("last_batch only support bool as input")
+
+        self.sampler = sampler
+        self.batch_size = batch_size
+        self.drop_last = drop_last
+
+    def __iter__(self):
+        batch = []
+        for idx in self.sampler:
+            batch.append(idx)
+            if len(batch) == self.batch_size:
+                yield batch
+                batch = []
+        if len(batch) > 0 and not self.drop_last:
+            yield batch
+
+    def __len__(self):
+        if self.drop_last:
+            return len(self.sampler) // self.batch_size
+        else:
+            return (len(self.sampler) + self.batch_size - 1) // self.batch_size
+
+class IndexFetcher():
+    def __init__(self, dataset, collate_fn, drop_last):
+        self.dataset = dataset
+        self.collate_fn = collate_fn
+        self.drop_last = drop_last
+
+    def __call__(self, batched_indices):
+        data = [self.dataset[idx] for idx in batched_indices]
+        return self.collate_fn(data)
+
+
+def default_collate(batch):
+    """Merge data with outer dimension batch size."""
+    elem = batch[0]
+    if isinstance(elem, collections.abc.Mapping):
+        return {key: default_collate([d[key] for d in batch]) for key in elem}
+    elif isinstance(elem, collections.abc.Sequence):
+        batch = zip(*batch)
+        return [default_collate(samples) for samples in batch]
+    elif isinstance(elem, np.ndarray):
+        try:
+            return np.stack(batch)
+        except:
+            return batch
+    else:
+        return batch
+
+class COCORawDataloader():
+    def __init__(self, dataset, batch_size=1, last_batch='rollover', collate_fn=None,
+                 sampler=None, batch_sampler=None, num_workers=0, pin_memory=False,
+                 shuffle=False):
+        self.dataset = dataset
+        self.last_batch = last_batch
+        self.sampler = sampler
+        self.batch_sampler = batch_sampler
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.collate_fn = collate_fn
+        self.batch_size = batch_size
+        self.shuffle = shuffle
+        self.drop_last = False if last_batch == 'rollover' else True
+        if self.collate_fn == None:
+            self.collate_fn = default_collate
+
+    def __iter__(self):
+        """Yield data in iterative order."""
+        return self._generate_dataloader(
+            self.dataset,
+            batch_size=self.batch_size,
+            last_batch=self.last_batch,
+            collate_fn=self.collate_fn,
+            sampler=self.sampler,
+            batch_sampler=self.batch_sampler,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            shuffle=self.shuffle)
+
+    def _generate_dataloader(self, dataset, batch_size, last_batch, collate_fn, sampler,
+                             batch_sampler, num_workers, pin_memory, shuffle):
+
+        sampler = self._generate_sampler(dataset)
+        self.batch_sampler = BatchSampler(sampler, batch_size, self.drop_last)
+        self.fetcher = IndexFetcher(dataset, collate_fn, self.drop_last)
+
+        for batched_indices in self.batch_sampler:
+            try:
+                data = self.fetcher(batched_indices)
+                yield data
+            except StopIteration:
+                return
+
+    def _generate_sampler(self, dataset):
+        if hasattr(dataset, "__getitem__"):
+            self.dataset_type = 'index'
+            return SequentialSampler(dataset)
+        else:
+            raise ValueError("dataset type only support (index, iter)")
+
+
+class COCORawDataset():
+    """Coco raw dataset.
+    Please arrange data in this way:
+        root
+          ├──  1.jpg
+          ├──  2.jpg
+               ...
+          └──  n.jpg
+        anno_dir
+    Please use Resize transform when batch_size > 1
+    Args: root (str): Root directory of dataset.
+          anno_dir (str, default='annotations/instances_val2017.json'): annotation file directory.
+          transform (transform object, default=None):  transform to process input data.
+          filter (Filter objects, default=None): filter out examples according 
+                                                 to specific conditions.
+    """
+
+    def __init__(self, root, \
+            anno_dir='annotations/instances_val2017.json', transform=None, filter=None):
+        """Initialize the attributes of class."""
+        self.batch_size = 1
+        self.image_list = []
+        self.transform = transform
+        img_path = root
+        anno_path = os.path.join(os.path.dirname(root), anno_dir)
+        coco = COCO(anno_path)
+        img_ids = coco.getImgIds()
+        cat_ids = coco.getCatIds()
+        for idx, img_id in enumerate(img_ids):
+            img_info = {}
+            bboxes = []
+            labels = []
+            ids = []
+            img_detail = coco.loadImgs(img_id)[0]
+            ids.append(img_detail['file_name'].encode('utf-8'))
+            pic_height = img_detail['height']
+            pic_width = img_detail['width']
+
+            ann_ids = coco.getAnnIds(imgIds=img_id,catIds=cat_ids)
+            anns = coco.loadAnns(ann_ids)
+            for ann in anns:
+                bbox = ann['bbox']
+                if len(bbox) == 0:
+                    continue
+                bbox = [bbox[0]/float(pic_width), bbox[1]/float(pic_height),\
+                    bbox[2]/float(pic_width), bbox[3]/float(pic_height)]
+                bboxes.append([bbox[1], bbox[0], bbox[1]+bbox[3], bbox[0]+bbox[2]])
+                labels.append(coco.cats[ann['category_id']]['name'].encode('utf8'))
+            img_file = os.path.join(img_path, img_detail['file_name'])
+            if not os.path.exists(img_file) or len(bboxes) == 0:
+                continue
+
+            if filter and not filter(None, bboxes):
+                continue
+
+            with Image.open(img_file) as image:
+                image = np.array(image.convert('RGB'))
+            self.image_list.append(
+                (image, [np.array(bboxes), np.array(labels), np.array([]),\
+                 np.array(img_detail['file_name'].encode('utf-8'))]))
+
+    def __len__(self):
+        """Length of the dataset."""
+        return len(self.image_list)
+
+    def __getitem__(self, index):
+        """Magic method.
+        x[i] is roughly equivalent to type(x).__getitem__(x, index)
+        """
+        sample = self.image_list[index]
+        if self.transform is not None:
+            sample= self.transform(sample)
+        return sample
+
+interpolation_map = {
+    'nearest': cv2.INTER_NEAREST,
+    'bilinear': cv2.INTER_LINEAR,
+    'bicubic': cv2.INTER_CUBIC,
+}
+
+class ResizeTransform():
+    def __init__(self, size, interpolation='bilinear'):
+        if isinstance(size, int):
+            self.size = size, size
+        elif isinstance(size, list):
+            if len(size) == 1:
+                self.size = size[0], size[0]
+            elif len(size) == 2:
+                self.size = size[0], size[1]
+
+        if interpolation in interpolation_map.keys():
+            self.interpolation = interpolation_map[interpolation]
+        else:
+            raise ValueError("Undefined interpolation type")
+
+    def __call__(self, sample):
+        image, label = sample
+        image = cv2.resize(image, self.size, interpolation=self.interpolation)
+        if len(image.shape) == 2:
+            image = np.expand_dims(image, -1)
+        return (image, label)
+
+class RescaleTransform():
+    """Scale the values of image to [0,1].
+    Returns:
+        tuple of processed image and label
+    """
+
+    def __call__(self, sample):
+        """Scale the values of the image in sample."""
+        image, label = sample
+        if isinstance(image, np.ndarray):
+            image = image.astype('float32') / 255.
+        return (image, label)
+
+class NormalizeTransform():
+    def __init__(self, mean=[0.0], std=[1.0]):
+        self.mean = mean
+        self.std = std
+        for item in self.std:
+            if item < 10**-6:
+                raise ValueError("Std should be greater than 0")
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(self.mean) == image.shape[-1], 'Mean channel must match image channel'
+        image = (image - self.mean) / self.std
+        return (image, label)
+
+class TransposeTransform():
+    def __init__(self, perm):
+        self.perm = perm
+
+    def __call__(self, sample):
+        image, label = sample
+        assert len(image.shape) == len(self.perm), "Image rank doesn't match Perm rank"
+        image = np.transpose(image, axes=self.perm)
+        return (image, label)
+
+np_dtype_map = {'int8': np.int8, 'uint8': np.uint8, 'complex64': np.complex64,
+           'uint16': np.uint16, 'int32': np.int32, 'uint32': np.uint32,
+           'int64': np.int64, 'uint64': np.uint64, 'float32': np.float32,
+           'float16': np.float16, 'float64': np.float64, 'bool': bool,
+           'string': str, 'complex128': np.complex128, 'int16': np.int16}
+
+class CastTransform():
+    def __init__(self, dtype='float32'):
+        assert dtype in np_dtype_map.keys(), 'Unknown dtype'
+        self.dtype = dtype
+
+    def __call__(self, sample):
+        image, label = sample
+        image = image.astype(np_dtype_map[self.dtype])
+        return (image, label)
+
+class ComposeTransform():
+    def __init__(self, transform_list):
+        self.transform_list = transform_list
+
+    def __call__(self, sample):
+        for transform in self.transform_list:
+            sample = transform(sample)
+        return sample
+
+class COCOmAPv2():
+    """Compute mean average precision of the detection task."""
+
+    def __init__(self, 
+                 anno_path=None, 
+                 iou_thrs='0.5:0.05:0.95', 
+                 map_points=101, 
+                 map_key='DetectionBoxes_Precision/mAP', 
+                 output_index_mapping={'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}):
+        """Initialize the metric.
+        Args:
+            anno_path: The path of annotation file.
+            iou_thrs: Minimal value for intersection over union that allows to make decision
+              that prediction bounding box is true positive. You can specify one float value
+              between 0 to 1 or string "05:0.05:0.95" for standard COCO thresholds.
+            map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for 
+              11-point interpolated AP, 0 for area under PR curve.
+            map_key: The key that mapping to pycocotools COCOeval. 
+              Defaults to 'DetectionBoxes_Precision/mAP'.
+            output_index_mapping: The output index mapping. 
+              Defaults to {'num_detections':-1, 'boxes':0, 'scores':1, 'classes':2}.
+        """
+        self.output_index_mapping = output_index_mapping
+        from coco_label_map import category_map
+        if anno_path:
+            assert os.path.exists(anno_path), 'Annotation path does not exists!'
+            with open(anno_path, 'r') as f:
+                label_map = yaml.safe_load(f.read())
+            self.category_map_reverse = {k: v for k,v in label_map.items()}
+        else:
+            # label: index
+            self.category_map_reverse = {v: k for k, v in category_map.items()}
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+        self.category_map = category_map
+        self.category_id_set = set(
+            [cat for cat in self.category_map]) #index
+        self.iou_thrs = iou_thrs
+        self.map_points = map_points
+        self.map_key = map_key
+
+    def update(self, predicts, labels, sample_weight=None):
+        """Add the predictions and labels.
+        Args:
+            predicts: The predictions.
+            labels: The labels corresponding to the predictions.
+            sample_weight: The sample weight. Defaults to None.
+        """
+        from coco_tools import ExportSingleImageGroundtruthToCoco,\
+            ExportSingleImageDetectionBoxesToCoco
+        detections = []
+        if 'num_detections' in self.output_index_mapping and \
+            self.output_index_mapping['num_detections'] > -1:
+            for item in zip(*predicts):
+                detection = {}
+                num = int(item[self.output_index_mapping['num_detections']])
+                detection['boxes'] = np.asarray(
+                    item[self.output_index_mapping['boxes']])[0:num]
+                detection['scores'] = np.asarray(
+                    item[self.output_index_mapping['scores']])[0:num]
+                detection['classes'] = np.asarray(
+                    item[self.output_index_mapping['classes']])[0:num]
+                detections.append(detection)
+        else:
+            for item in zip(*predicts):
+                detection = {}
+                detection['boxes'] = np.asarray(item[self.output_index_mapping['boxes']])
+                detection['scores'] = np.asarray(item[self.output_index_mapping['scores']])
+                detection['classes'] = np.asarray(item[self.output_index_mapping['classes']])
+                detections.append(detection)
+
+        bboxes, str_labels,int_labels, image_ids = labels
+        labels = []
+        if len(int_labels[0]) == 0:
+            for str_label in str_labels:
+                str_label = [
+                    x if type(x) == 'str' else x.decode('utf-8')
+                    for x in str_label
+                ]
+                labels.append([self.category_map_reverse[x] for x in str_label])
+        elif len(str_labels[0]) == 0:
+            for int_label in int_labels:
+                labels.append([x for x in int_label])
+
+        for idx, image_id in enumerate(image_ids):
+            image_id = image_id if type(
+                image_id) == 'str' else image_id.decode('utf-8')
+            if image_id in self.image_ids:
+                continue
+            self.image_ids.append(image_id)
+
+            ground_truth = {}
+            ground_truth['boxes'] = np.asarray(bboxes[idx])
+            ground_truth['classes'] = np.asarray(labels[idx])
+
+            self.ground_truth_list.extend(
+                ExportSingleImageGroundtruthToCoco(
+                    image_id=image_id,
+                    next_annotation_id=self.annotation_id,
+                    category_id_set=self.category_id_set,
+                    groundtruth_boxes=ground_truth['boxes'],
+                    groundtruth_classes=ground_truth['classes']))
+            self.annotation_id += ground_truth['boxes'].shape[0]
+
+            self.detection_list.extend(
+                ExportSingleImageDetectionBoxesToCoco(
+                    image_id=image_id,
+                    category_id_set=self.category_id_set,
+                    detection_boxes=detections[idx]['boxes'],
+                    detection_scores=detections[idx]['scores'],
+                    detection_classes=detections[idx]['classes']))
+
+    def reset(self):
+        """Reset the prediction and labels."""
+        self.image_ids = []
+        self.ground_truth_list = []
+        self.detection_list = []
+        self.annotation_id = 1
+
+    def result(self):
+        """Compute mean average precision.
+        Returns:
+            The mean average precision score.
+        """
+        from coco_tools import COCOWrapper, COCOEvalWrapper
+        if len(self.ground_truth_list) == 0:
+            return 0
+        else:
+            groundtruth_dict = {
+                'annotations':
+                self.ground_truth_list,
+                'images': [{
+                    'id': image_id
+                } for image_id in self.image_ids],
+                'categories': [{
+                    'id': k,
+                    'name': v
+                } for k, v in self.category_map.items()]
+            }
+            coco_wrapped_groundtruth = COCOWrapper(groundtruth_dict)
+            coco_wrapped_detections = coco_wrapped_groundtruth.LoadAnnotations(
+                self.detection_list)
+            box_evaluator = COCOEvalWrapper(coco_wrapped_groundtruth,
+                                                 coco_wrapped_detections,
+                                                 agnostic_mode=False,
+                                                 iou_thrs = self.iou_thrs,
+                                                 map_points = self.map_points)
+            box_metrics, box_per_category_ap = box_evaluator.ComputeMetrics(
+                include_metrics_per_category=False, all_metrics_per_category=False)
+            box_metrics.update(box_per_category_ap)
+            box_metrics = {
+                'DetectionBoxes_' + key: value
+                for key, value in iter(box_metrics.items())
+            }
+
+            return box_metrics[self.map_key]
+
+class Post:
+    def __call__(self, sample):
+        preds, labels = sample
+        preds[0][0][:, [0, 1, 2, 3]] = preds[0][0][:, [1, 0, 3, 2]]
+        return preds, labels
+
+class LabelBalanceCOCORawFilter(object):
+    """The label balance filter for COCO raw data."""
+
+    def __init__(self, size=1):
+        """Initialize the attribute of class."""
+        self.size = size
+
+    def __call__(self, image, label):
+        """Execute the filter.
+
+        Args:
+            image: Not used.
+            label: label of a sample.
+        """
         return len(label) == self.size
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/main.py
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/prepare_model.py
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/prepare_model.py
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/run_quant.sh
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/README.md
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/patch b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/patch
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/patch
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/patch
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare.sh b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare.sh
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare.sh
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare.sh
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py
similarity index 97%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py
index 2ec0a8cebb7..73301702b3a 100644
--- a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py
+++ b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/prepare_model.py
@@ -1,100 +1,100 @@
-import argparse
-import os
-import subprocess
-import sys
-from urllib import request
-
-MODEL_URLS = {"structure_detr": "https://huggingface.co/bsmock/tatr-pubtables1m-v1.0/resolve/main/pubtables1m_structure_detr_r18.pth",
-              "detection_detr": "https://huggingface.co/bsmock/tatr-pubtables1m-v1.0/resolve/main/pubtables1m_detection_detr_r18.pth"}
-MAX_TIMES_RETRY_DOWNLOAD = 5
-
-
-def parse_arguments():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--input_model",
-                        type=str,
-                        required=False,
-                        choices=["structure_detr", "detection_detr"],
-                        default="structure_detr")
-    parser.add_argument("--output_model", type=str, required=True)
-    parser.add_argument("--dataset_location", type=str, required=True)
-    return parser.parse_args()
-
-
-def progressbar(cur, total=100):
-    percent = '{:.2%}'.format(cur / total)
-    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
-    sys.stdout.flush()
-
-
-def schedule(blocknum, blocksize, totalsize):
-    if totalsize == 0:
-        percent = 0
-    else:
-        percent = min(1.0, blocknum * blocksize / totalsize) * 100
-    progressbar(percent)
-
-
-def download_model(url, retry_times=5):
-    model_name = url.split("/")[-1]
-    if os.path.isfile(model_name):
-        print(f"{model_name} exists, skip download")
-        return True
-
-    print("download model...")
-    retries = 0
-    while retries < retry_times:
-        try:
-            request.urlretrieve(url, model_name, schedule)
-            break
-        except KeyboardInterrupt:
-            return False
-        except:
-            retries += 1
-            print(f"Download failed{', Retry downloading...' if retries < retry_times else '!'}")
-    return retries < retry_times
-
-
-def export_model(input_model, output_model, dataset_location):
-    print("\nexport model...")
-
-    if not os.path.exists("./table-transformer"):
-        subprocess.run("bash prepare.sh", shell=True)
-    
-    model_load_path = os.path.abspath(MODEL_URLS[input_model].split("/")[-1])
-    output_model = os.path.join(os.path.dirname(model_load_path), output_model)
-    if input_model == "detection_detr":
-        data_root_dir = os.path.join(dataset_location, "PubTables-1M-Detection")
-        data_type = "detection"
-        config_file = "detection_config.json"
-    elif input_model == "structure_detr":
-        data_root_dir = os.path.join(dataset_location, "PubTables-1M-Structure")
-        data_type = "structure"
-        config_file = "structure_config.json"
-    table_words_dir = os.path.join(data_root_dir, "words")
-
-    os.chdir("table-transformer/src")
-
-    command = f"python main.py \
-                --model_load_path {model_load_path} \
-                --output_model {output_model} \
-                --data_root_dir {data_root_dir} \
-                --table_words_dir {table_words_dir} \
-                --mode export \
-                --data_type {data_type} \
-                --device cpu \
-                --config_file {config_file}"
-
-    subprocess.run(command, shell=True)
-    assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
-
-
-def prepare_model(input_model, output_model, dataset_location):
-    is_download_successful = download_model(MODEL_URLS[args.input_model], MAX_TIMES_RETRY_DOWNLOAD)
-    if is_download_successful:
-        export_model(input_model, output_model, dataset_location)
-
-
-if __name__ == "__main__":
-    args = parse_arguments()
+import argparse
+import os
+import subprocess
+import sys
+from urllib import request
+
+MODEL_URLS = {"structure_detr": "https://huggingface.co/bsmock/tatr-pubtables1m-v1.0/resolve/main/pubtables1m_structure_detr_r18.pth",
+              "detection_detr": "https://huggingface.co/bsmock/tatr-pubtables1m-v1.0/resolve/main/pubtables1m_detection_detr_r18.pth"}
+MAX_TIMES_RETRY_DOWNLOAD = 5
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--input_model",
+                        type=str,
+                        required=False,
+                        choices=["structure_detr", "detection_detr"],
+                        default="structure_detr")
+    parser.add_argument("--output_model", type=str, required=True)
+    parser.add_argument("--dataset_location", type=str, required=True)
+    return parser.parse_args()
+
+
+def progressbar(cur, total=100):
+    percent = '{:.2%}'.format(cur / total)
+    sys.stdout.write("\r[%-100s] %s" % ('#' * int(cur), percent))
+    sys.stdout.flush()
+
+
+def schedule(blocknum, blocksize, totalsize):
+    if totalsize == 0:
+        percent = 0
+    else:
+        percent = min(1.0, blocknum * blocksize / totalsize) * 100
+    progressbar(percent)
+
+
+def download_model(url, retry_times=5):
+    model_name = url.split("/")[-1]
+    if os.path.isfile(model_name):
+        print(f"{model_name} exists, skip download")
+        return True
+
+    print("download model...")
+    retries = 0
+    while retries < retry_times:
+        try:
+            request.urlretrieve(url, model_name, schedule)
+            break
+        except KeyboardInterrupt:
+            return False
+        except:
+            retries += 1
+            print(f"Download failed{', Retry downloading...' if retries < retry_times else '!'}")
+    return retries < retry_times
+
+
+def export_model(input_model, output_model, dataset_location):
+    print("\nexport model...")
+
+    if not os.path.exists("./table-transformer"):
+        subprocess.run("bash prepare.sh", shell=True)
+    
+    model_load_path = os.path.abspath(MODEL_URLS[input_model].split("/")[-1])
+    output_model = os.path.join(os.path.dirname(model_load_path), output_model)
+    if input_model == "detection_detr":
+        data_root_dir = os.path.join(dataset_location, "PubTables-1M-Detection")
+        data_type = "detection"
+        config_file = "detection_config.json"
+    elif input_model == "structure_detr":
+        data_root_dir = os.path.join(dataset_location, "PubTables-1M-Structure")
+        data_type = "structure"
+        config_file = "structure_config.json"
+    table_words_dir = os.path.join(data_root_dir, "words")
+
+    os.chdir("table-transformer/src")
+
+    command = f"python main.py \
+                --model_load_path {model_load_path} \
+                --output_model {output_model} \
+                --data_root_dir {data_root_dir} \
+                --table_words_dir {table_words_dir} \
+                --mode export \
+                --data_type {data_type} \
+                --device cpu \
+                --config_file {config_file}"
+
+    subprocess.run(command, shell=True)
+    assert os.path.exists(output_model), f"Export failed! {output_model} doesn't exist!"
+
+
+def prepare_model(input_model, output_model, dataset_location):
+    is_download_successful = download_model(MODEL_URLS[args.input_model], MAX_TIMES_RETRY_DOWNLOAD)
+    if is_download_successful:
+        export_model(input_model, output_model, dataset_location)
+
+
+if __name__ == "__main__":
+    args = parse_arguments()
     prepare_model(args.input_model, args.output_model, args.dataset_location)
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/requirements.txt b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/requirements.txt
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/requirements.txt
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/requirements.txt
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_benchmark.sh b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_benchmark.sh
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_benchmark.sh
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_benchmark.sh
diff --git a/examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_quant.sh b/examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_quant.sh
similarity index 100%
rename from examples/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_quant.sh
rename to examples/deprecated/onnxrt/object_detection/table_transformer/quantization/ptq_static/run_quant.sh
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/README.md b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/README.md
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/README.md
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/README.md
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket-ipex.png b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket-ipex.png
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket-ipex.png
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket-ipex.png
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket.png b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket.png
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket.png
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/generated_images/dog-bucket.png
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/requirements.txt b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/requirements.txt
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/requirements.txt
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/requirements.txt
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth.py b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth.py
diff --git a/examples/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth_ipex.py b/examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth_ipex.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth_ipex.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/dreambooth/train_dreambooth_ipex.py
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/README.md b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/README.md
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/README.md
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/README.md
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/dicoo/1.jpeg b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/dicoo/1.jpeg
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/dicoo/1.jpeg
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/dicoo/1.jpeg
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/README.md b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/README.md
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/README.md
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/README.md
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/FP32.png b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/FP32.png
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/FP32.png
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/FP32.png
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/INT8.png b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/INT8.png
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/INT8.png
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/images/INT8.png
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/requirements.txt b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/requirements.txt
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/requirements.txt
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/requirements.txt
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/text2images.py b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/text2images.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/text2images.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/text2images.py
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/textual_inversion.py b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/textual_inversion.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/textual_inversion.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/distillation_for_quantization/textual_inversion.py
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/generated_images/dicoo_christmas.png b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/generated_images/dicoo_christmas.png
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/generated_images/dicoo_christmas.png
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/generated_images/dicoo_christmas.png
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/requirements.txt b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/requirements.txt
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/requirements.txt
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/requirements.txt
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion.py b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion.py
diff --git a/examples/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion_ipex.py b/examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion_ipex.py
similarity index 100%
rename from examples/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion_ipex.py
rename to examples/deprecated/pytorch/diffusion_model/diffusers/textual_inversion/textual_inversion_ipex.py
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/__init__.py b/examples/deprecated/pytorch/image_recognition/3d-unet/__init__.py
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/__init__.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/__init__.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.dockerignore b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.dockerignore
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.dockerignore
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.dockerignore
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.gitignore b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.gitignore
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.gitignore
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/.gitignore
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Makefile b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Makefile
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Makefile
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Makefile
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Task043_BraTS_2019.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Task043_BraTS_2019.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Task043_BraTS_2019.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/Task043_BraTS_2019.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/accuracy-brats.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/accuracy-brats.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/accuracy-brats.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/accuracy-brats.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_QSL.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_QSL.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_QSL.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_QSL.py
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_cal_images_list.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_cal_images_list.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold0_validation.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold0_validation.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold1_validation.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold1_validation.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold2_validation.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold2_validation.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold3_validation.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold3_validation.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold4_validation.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold4_validation.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/mlperf.conf b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/mlperf.conf
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/mlperf.conf
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/mlperf.conf
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/onnxruntime_SUT.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/onnxruntime_SUT.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/onnxruntime_SUT.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/onnxruntime_SUT.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/ov_SUT.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/ov_SUT.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/ov_SUT.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/ov_SUT.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/preprocess.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/preprocess.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/preprocess.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/preprocess.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/pytorch_SUT.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/pytorch_SUT.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/pytorch_SUT.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/pytorch_SUT.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/tf_SUT.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/tf_SUT.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/tf_SUT.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/tf_SUT.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_onnx_to_tf.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_onnx_to_tf.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_onnx_to_tf.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_onnx_to_tf.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_pytorch_to_onnx.py b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_pytorch_to_onnx.py
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_pytorch_to_onnx.py
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/unet_pytorch_to_onnx.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/user.conf b/examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/user.conf
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/user.conf
rename to examples/deprecated/pytorch/image_recognition/3d-unet/quantization/ptq/fx/user.conf
diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/README.md b/examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/CNN-2/distillation/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/README.md
diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py b/examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/CNN-2/distillation/eager/main.py
rename to examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/main.py
diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/plain_cnn_cifar.py b/examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/plain_cnn_cifar.py
similarity index 100%
rename from examples/pytorch/image_recognition/CNN-2/distillation/eager/plain_cnn_cifar.py
rename to examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/plain_cnn_cifar.py
diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/requirements.txt
diff --git a/examples/pytorch/image_recognition/CNN-2/distillation/eager/train_without_distillation.py b/examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/train_without_distillation.py
similarity index 100%
rename from examples/pytorch/image_recognition/CNN-2/distillation/eager/train_without_distillation.py
rename to examples/deprecated/pytorch/image_recognition/CNN-2/distillation/eager/train_without_distillation.py
diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md b/examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/README.md
diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py b/examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py
rename to examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/main.py
diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/requirements.txt
diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/train_without_distillation.py b/examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/train_without_distillation.py
similarity index 100%
rename from examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/train_without_distillation.py
rename to examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/train_without_distillation.py
diff --git a/examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/wideresnet.py b/examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/wideresnet.py
similarity index 100%
rename from examples/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/wideresnet.py
rename to examples/deprecated/pytorch/image_recognition/MobileNetV2-0.35/distillation/eager/wideresnet.py
diff --git a/examples/pytorch/image_recognition/ResNet50/pruning/eager/README.md b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/README.md
similarity index 97%
rename from examples/pytorch/image_recognition/ResNet50/pruning/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/README.md
index f63c9132279..86303624206 100644
--- a/examples/pytorch/image_recognition/ResNet50/pruning/eager/README.md
+++ b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/README.md
@@ -1,106 +1,106 @@
-# Step by Step
-This document describes the step-by-step instructions for pruning ResNet50 on ImageNet dataset. The example refers **pytorch-image-model[](https://github.com/huggingface/pytorch-image-models)**, a popular package for PyTorch image models.
-
-# Prerequisite
-## Environment
-First, please make sure that you have successfully installed neural_compressor.
-```bash
-# install dependencies
-cd examples/pytorch/image_recognition/ResNet50/pruning/eager/
-pip install -r requirements.txt
-```
-## Prepare Dataset
-Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/imagenet.  The dir include below folder:
-```bash
-ls /path/to/imagenet
-train  val
-```
-
-# Pruning
-Go to the script run_resnet50_prune.sh. Please get familiar with some parameters of pruning by referring to our [Pruning API README](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner)
-```bash
-#!/bin/bash
-DATA="/path/to/your/dataset/"
-python ./train.py \
-    ${DATA} \
-    --model "resnet50" \
-    --num-classes 1000 \
-    --pretrained \
-    --batch-size 128 \
-    --lr 0.175 \
-    --epochs 180 \
-    --warmup-epochs 0 \
-    --cooldown-epochs 20 \
-    --do-prune \
-    --do-distillation \
-    --target-sparsity 0.75 \
-    --pruning-pattern "2x1" \
-    --update-frequency-on-step 2000 \
-    --distillation-loss-weight "1.0" \
-    --output ./path/save/your/models/ \
-```
-After configs are settled, just run:
-```bash
-sh run_resnet50_prune.sh
-```
-
-If you do not have a GPU, our code will automatically deploy pruning process on CPU. If you do have GPUs and CUDA but you still want to execute the pruning on CPU, use an extra argument of "--no-cuda".
-```bash
-#!/bin/bash
-DATA="/path/to/your/dataset/"
-python ./train.py \
-    ${DATA} \
-    --model "resnet50" \
-    --num-classes 1000 \
-    --pretrained \
-    --batch-size 128 \
-    --lr 0.175 \
-    --epochs 180 \
-    --warmup-epochs 0 \
-    --cooldown-epochs 20 \
-    --do-prune \
-    --do-distillation \
-    --target-sparsity 0.75 \
-    --pruning-pattern "2x1" \
-    --update-frequency-on-step 2000 \
-    --distillation-loss-weight "1.0" \
-    --output ./path/save/your/models/ \
-    --no-cuda
-```
-
-# Results
-Our dense ResNet50 model's accuracy is 80.1, and our pruned model with 75% 2x1 structured sparsity has accuracy of 78.95.
-Your can refer to our validated pruning results in our [documentation](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#validated-pruning-models)
-
-# Distributed Data Parallel Training
-We also supported Distributed Data Parallel training on single node and multi nodes settings for pruning. To use Distributed Data Parallel to speedup training, the bash command needs a small adjustment.
-<br>
-For example, bash command will look like the following, where *`<MASTER_ADDRESS>`* is the address of the master node, it won't be necessary for single node case, *`<NUM_PROCESSES_PER_NODE>`* is the desired processes to use in current node, for node with GPU, usually set to number of GPUs in this node, for node without GPU and use CPU for training, it's recommended set to 1, *`<NUM_NODES>`* is the number of nodes to use, *`<NODE_RANK>`* is the rank of the current node, rank starts from 0 to *`<NUM_NODES>`*`-1`.
-<br>
-Also please note that to use CPU for training in each node with multi nodes settings, argument `--no_cuda` is mandatory. In multi nodes setting, following command needs to be launched in each node, and all the commands should be the same except for *`<NODE_RANK>`*, which should be integer from 0 to *`<NUM_NODES>`*`-1` assigned to each node.
-
-```bash
-DATA="/path/to/your/dataset/"
-python -m torch.distributed.launch --master_addr=<MASTER_ADDRESS> --nproc_per_node=<NUM_PROCESSES_PER_NODE> --nnodes=<NUM_NODES> --node_rank=<NODE_RANK> \
-    ./train.py \
-    ${DATA} \
-    --model "resnet50" \
-    --num-classes 1000 \
-    --pretrained \
-    --batch-size 128 \
-    --lr 0.175 \
-    --epochs 180 \
-    --warmup-epochs 0 \
-    --cooldown-epochs 20 \
-    --do-prune \
-    --do-distillation \
-    --target-sparsity 0.75 \
-    --pruning-pattern "2x1" \
-    --update-frequency-on-step 2000 \
-    --distillation-loss-weight "1.0" \
-    --output ./path/save/your/models/ \
-    --no-cuda
-    --dist_backend gloo
-    --distributed
-```
-
+# Step by Step
+This document describes the step-by-step instructions for pruning ResNet50 on ImageNet dataset. The example refers **pytorch-image-model[](https://github.com/huggingface/pytorch-image-models)**, a popular package for PyTorch image models.
+
+# Prerequisite
+## Environment
+First, please make sure that you have successfully installed neural_compressor.
+```bash
+# install dependencies
+cd examples/pytorch/image_recognition/ResNet50/pruning/eager/
+pip install -r requirements.txt
+```
+## Prepare Dataset
+Download [ImageNet](http://www.image-net.org/) Raw image to dir: /path/to/imagenet.  The dir include below folder:
+```bash
+ls /path/to/imagenet
+train  val
+```
+
+# Pruning
+Go to the script run_resnet50_prune.sh. Please get familiar with some parameters of pruning by referring to our [Pruning API README](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner)
+```bash
+#!/bin/bash
+DATA="/path/to/your/dataset/"
+python ./train.py \
+    ${DATA} \
+    --model "resnet50" \
+    --num-classes 1000 \
+    --pretrained \
+    --batch-size 128 \
+    --lr 0.175 \
+    --epochs 180 \
+    --warmup-epochs 0 \
+    --cooldown-epochs 20 \
+    --do-prune \
+    --do-distillation \
+    --target-sparsity 0.75 \
+    --pruning-pattern "2x1" \
+    --update-frequency-on-step 2000 \
+    --distillation-loss-weight "1.0" \
+    --output ./path/save/your/models/ \
+```
+After configs are settled, just run:
+```bash
+sh run_resnet50_prune.sh
+```
+
+If you do not have a GPU, our code will automatically deploy pruning process on CPU. If you do have GPUs and CUDA but you still want to execute the pruning on CPU, use an extra argument of "--no-cuda".
+```bash
+#!/bin/bash
+DATA="/path/to/your/dataset/"
+python ./train.py \
+    ${DATA} \
+    --model "resnet50" \
+    --num-classes 1000 \
+    --pretrained \
+    --batch-size 128 \
+    --lr 0.175 \
+    --epochs 180 \
+    --warmup-epochs 0 \
+    --cooldown-epochs 20 \
+    --do-prune \
+    --do-distillation \
+    --target-sparsity 0.75 \
+    --pruning-pattern "2x1" \
+    --update-frequency-on-step 2000 \
+    --distillation-loss-weight "1.0" \
+    --output ./path/save/your/models/ \
+    --no-cuda
+```
+
+# Results
+Our dense ResNet50 model's accuracy is 80.1, and our pruned model with 75% 2x1 structured sparsity has accuracy of 78.95.
+Your can refer to our validated pruning results in our [documentation](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#validated-pruning-models)
+
+# Distributed Data Parallel Training
+We also supported Distributed Data Parallel training on single node and multi nodes settings for pruning. To use Distributed Data Parallel to speedup training, the bash command needs a small adjustment.
+<br>
+For example, bash command will look like the following, where *`<MASTER_ADDRESS>`* is the address of the master node, it won't be necessary for single node case, *`<NUM_PROCESSES_PER_NODE>`* is the desired processes to use in current node, for node with GPU, usually set to number of GPUs in this node, for node without GPU and use CPU for training, it's recommended set to 1, *`<NUM_NODES>`* is the number of nodes to use, *`<NODE_RANK>`* is the rank of the current node, rank starts from 0 to *`<NUM_NODES>`*`-1`.
+<br>
+Also please note that to use CPU for training in each node with multi nodes settings, argument `--no_cuda` is mandatory. In multi nodes setting, following command needs to be launched in each node, and all the commands should be the same except for *`<NODE_RANK>`*, which should be integer from 0 to *`<NUM_NODES>`*`-1` assigned to each node.
+
+```bash
+DATA="/path/to/your/dataset/"
+python -m torch.distributed.launch --master_addr=<MASTER_ADDRESS> --nproc_per_node=<NUM_PROCESSES_PER_NODE> --nnodes=<NUM_NODES> --node_rank=<NODE_RANK> \
+    ./train.py \
+    ${DATA} \
+    --model "resnet50" \
+    --num-classes 1000 \
+    --pretrained \
+    --batch-size 128 \
+    --lr 0.175 \
+    --epochs 180 \
+    --warmup-epochs 0 \
+    --cooldown-epochs 20 \
+    --do-prune \
+    --do-distillation \
+    --target-sparsity 0.75 \
+    --pruning-pattern "2x1" \
+    --update-frequency-on-step 2000 \
+    --distillation-loss-weight "1.0" \
+    --output ./path/save/your/models/ \
+    --no-cuda
+    --dist_backend gloo
+    --distributed
+```
+
diff --git a/examples/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt
similarity index 92%
rename from examples/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt
index 9211e72a075..932a46bb74a 100644
--- a/examples/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt
+++ b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/requirements.txt
@@ -1,4 +1,4 @@
-
-torch
-torchvision
+
+torch
+torchvision
 timm==0.8.13.dev0
\ No newline at end of file
diff --git a/examples/pytorch/image_recognition/ResNet50/pruning/eager/run_resnet50_prune.sh b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/run_resnet50_prune.sh
similarity index 100%
rename from examples/pytorch/image_recognition/ResNet50/pruning/eager/run_resnet50_prune.sh
rename to examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/run_resnet50_prune.sh
diff --git a/examples/pytorch/image_recognition/ResNet50/pruning/eager/train.py b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/train.py
similarity index 97%
rename from examples/pytorch/image_recognition/ResNet50/pruning/eager/train.py
rename to examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/train.py
index 443adbb7d29..2184e648c8c 100644
--- a/examples/pytorch/image_recognition/ResNet50/pruning/eager/train.py
+++ b/examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/train.py
@@ -1,1151 +1,1151 @@
-#!/usr/bin/env python3
-""" ImageNet Training Script
-This is intended to be a lean and easily modifiable ImageNet training script that reproduces ImageNet
-training results with some of the latest networks and training techniques. It favours canonical PyTorch
-and standard Python style over trying to be able to 'do it all.' That said, it offers quite a few speed
-and training result improvements over the usual PyTorch example scripts. Repurpose as you see fit.
-This script was started from an early version of the PyTorch ImageNet example
-(https://github.com/pytorch/examples/tree/master/imagenet)
-NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples
-(https://github.com/NVIDIA/apex/tree/master/examples/imagenet)
-Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
-"""
-import argparse
-import logging
-import os
-import time
-from collections import OrderedDict
-from contextlib import suppress
-from datetime import datetime
-from functools import partial
-import sys
-sys.path.append("./")
-
-import torch
-import torch.nn as nn
-from torch.nn import functional as F
-import torchvision.utils
-import yaml
-from torch.nn.parallel import DistributedDataParallel as NativeDDP
-
-from timm import utils
-from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
-from timm.layers import convert_splitbn_model, convert_sync_batchnorm, set_fast_norm
-from timm.loss import JsdCrossEntropy, SoftTargetCrossEntropy, BinaryCrossEntropy, LabelSmoothingCrossEntropy
-from timm.models import create_model, safe_model_name, resume_checkpoint, load_checkpoint, model_parameters
-from timm.optim import create_optimizer_v2, optimizer_kwargs
-from timm.scheduler import create_scheduler_v2, scheduler_kwargs
-from timm.utils import ApexScaler, NativeScaler
-
-try:
-    from apex import amp
-    from apex.parallel import DistributedDataParallel as ApexDDP
-    from apex.parallel import convert_syncbn_model
-    has_apex = True
-except ImportError:
-    has_apex = False
-
-has_native_amp = False
-try:
-    if getattr(torch.cuda.amp, 'autocast') is not None:
-        has_native_amp = True
-except AttributeError:
-    pass
-
-try:
-    import wandb
-    has_wandb = True
-except ImportError:
-    has_wandb = False
-
-try:
-    from functorch.compile import memory_efficient_fusion
-    has_functorch = True
-except ImportError as e:
-    has_functorch = False
-
-has_compile = hasattr(torch, 'compile')
-
-def get_loss_one_logit(student_logit, teacher_logit):
-    t = 2.0 # distillation temperature
-    return F.kl_div(
-        input=F.log_softmax(student_logit / t, dim=-1),
-        target=F.softmax(teacher_logit / t, dim=-1),
-        reduction="batchmean"
-    ) * (t ** 2)
-
-_logger = logging.getLogger('train')
-
-# The first arg parser parses out only the --config argument, this argument is used to
-# load a yaml file containing key-values that override the defaults for the main parser below
-config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False)
-parser.add_argument('-c', '--config', default='', type=str, metavar='FILE',
-                    help='YAML config file specifying default arguments')
-
-
-parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
-
-# Dataset parameters
-group = parser.add_argument_group('Dataset parameters')
-# Keep this argument outside the dataset group because it is positional.
-parser.add_argument('data', nargs='?', metavar='DIR', const=None,
-                    help='path to dataset (positional is *deprecated*, use --data-dir)')
-parser.add_argument('--data-dir', metavar='DIR',
-                    help='path to dataset (root dir)')
-parser.add_argument('--dataset', metavar='NAME', default='',
-                    help='dataset type + name ("<type>/<name>") (default: ImageFolder or ImageTar if empty)')
-group.add_argument('--train-split', metavar='NAME', default='train',
-                   help='dataset train split (default: train)')
-group.add_argument('--val-split', metavar='NAME', default='validation',
-                   help='dataset validation split (default: validation)')
-group.add_argument('--dataset-download', action='store_true', default=False,
-                   help='Allow download of dataset for torch/ and tfds/ datasets that support it.')
-group.add_argument('--class-map', default='', type=str, metavar='FILENAME',
-                   help='path to class to idx mapping file (default: "")')
-
-# Model parameters
-group = parser.add_argument_group('Model parameters')
-group.add_argument('--model', default='resnet50', type=str, metavar='MODEL',
-                   help='Name of model to train (default: "resnet50")')
-group.add_argument('--pretrained', action='store_true', default=False,
-                   help='Start with pretrained version of specified network (if avail)')
-group.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH',
-                   help='Initialize model from this checkpoint (default: none)')
-group.add_argument('--resume', default='', type=str, metavar='PATH',
-                   help='Resume full model and optimizer state from checkpoint (default: none)')
-group.add_argument('--no-resume-opt', action='store_true', default=False,
-                   help='prevent resume of optimizer state when resuming model')
-group.add_argument('--num-classes', type=int, default=None, metavar='N',
-                   help='number of label classes (Model default if None)')
-group.add_argument('--gp', default=None, type=str, metavar='POOL',
-                   help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.')
-group.add_argument('--img-size', type=int, default=None, metavar='N',
-                   help='Image size (default: None => model default)')
-group.add_argument('--in-chans', type=int, default=None, metavar='N',
-                   help='Image input channels (default: None => 3)')
-group.add_argument('--input-size', default=None, nargs=3, type=int,
-                   metavar='N N N',
-                   help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty')
-group.add_argument('--crop-pct', default=None, type=float,
-                   metavar='N', help='Input image center crop percent (for validation only)')
-group.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
-                   help='Override mean pixel value of dataset')
-group.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
-                   help='Override std deviation of dataset')
-group.add_argument('--interpolation', default='', type=str, metavar='NAME',
-                   help='Image resize interpolation type (overrides model)')
-group.add_argument('-b', '--batch-size', type=int, default=128, metavar='N',
-                   help='Input batch size for training (default: 128)')
-group.add_argument('-vb', '--validation-batch-size', type=int, default=None, metavar='N',
-                   help='Validation batch size override (default: None)')
-group.add_argument('--channels-last', action='store_true', default=False,
-                   help='Use channels_last memory layout')
-group.add_argument('--fuser', default='', type=str,
-                   help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
-group.add_argument('--grad-checkpointing', action='store_true', default=False,
-                   help='Enable gradient checkpointing through model blocks/stages')
-group.add_argument('--fast-norm', default=False, action='store_true',
-                   help='enable experimental fast-norm')
-group.add_argument('--model-kwargs', nargs='*', default={}, action=utils.ParseKwargs)
-
-scripting_group = group.add_mutually_exclusive_group()
-scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
-                             help='torch.jit.script the full model')
-scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
-                             help="Enable compilation w/ specified backend (default: inductor).")
-scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
-                             help="Enable AOT Autograd support.")
-
-# Optimizer parameters
-group = parser.add_argument_group('Optimizer parameters')
-group.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
-                   help='Optimizer (default: "sgd")')
-group.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON',
-                   help='Optimizer Epsilon (default: None, use opt default)')
-group.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA',
-                   help='Optimizer Betas (default: None, use opt default)')
-group.add_argument('--momentum', type=float, default=0.9, metavar='M',
-                   help='Optimizer momentum (default: 0.9)')
-group.add_argument('--weight-decay', type=float, default=2e-5,
-                   help='weight decay (default: 2e-5)')
-group.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
-                   help='Clip gradient norm (default: None, no clipping)')
-group.add_argument('--clip-mode', type=str, default='norm',
-                   help='Gradient clipping mode. One of ("norm", "value", "agc")')
-group.add_argument('--layer-decay', type=float, default=None,
-                   help='layer-wise learning rate decay (default: None)')
-group.add_argument('--opt-kwargs', nargs='*', default={}, action=utils.ParseKwargs)
-
-# Learning rate schedule parameters
-group = parser.add_argument_group('Learning rate schedule parameters')
-group.add_argument('--sched', type=str, default='cosine', metavar='SCHEDULER',
-                   help='LR scheduler (default: "step"')
-group.add_argument('--sched-on-updates', action='store_true', default=False,
-                   help='Apply LR scheduler step on update instead of epoch end.')
-group.add_argument('--lr', type=float, default=None, metavar='LR',
-                   help='learning rate, overrides lr-base if set (default: None)')
-group.add_argument('--lr-base', type=float, default=0.1, metavar='LR',
-                   help='base learning rate: lr = lr_base * global_batch_size / base_size')
-group.add_argument('--lr-base-size', type=int, default=256, metavar='DIV',
-                   help='base learning rate batch size (divisor, default: 256).')
-group.add_argument('--lr-base-scale', type=str, default='', metavar='SCALE',
-                   help='base learning rate vs batch_size scaling ("linear", "sqrt", based on opt if empty)')
-group.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct',
-                   help='learning rate noise on/off epoch percentages')
-group.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT',
-                   help='learning rate noise limit percent (default: 0.67)')
-group.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV',
-                   help='learning rate noise std-dev (default: 1.0)')
-group.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT',
-                   help='learning rate cycle len multiplier (default: 1.0)')
-group.add_argument('--lr-cycle-decay', type=float, default=0.5, metavar='MULT',
-                   help='amount to decay each learning rate cycle (default: 0.5)')
-group.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N',
-                   help='learning rate cycle limit, cycles enabled if > 1')
-group.add_argument('--lr-k-decay', type=float, default=1.0,
-                   help='learning rate k-decay for cosine/poly (default: 1.0)')
-group.add_argument('--warmup-lr', type=float, default=1e-5, metavar='LR',
-                   help='warmup learning rate (default: 1e-5)')
-group.add_argument('--min-lr', type=float, default=0, metavar='LR',
-                   help='lower lr bound for cyclic schedulers that hit 0 (default: 0)')
-group.add_argument('--epochs', type=int, default=300, metavar='N',
-                   help='number of epochs to train (default: 300)')
-group.add_argument('--epoch-repeats', type=float, default=0., metavar='N',
-                   help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).')
-group.add_argument('--start-epoch', default=None, type=int, metavar='N',
-                   help='manual epoch number (useful on restarts)')
-group.add_argument('--decay-milestones', default=[90, 180, 270], type=int, nargs='+', metavar="MILESTONES",
-                   help='list of decay epoch indices for multistep lr. must be increasing')
-group.add_argument('--decay-epochs', type=float, default=90, metavar='N',
-                   help='epoch interval to decay LR')
-group.add_argument('--warmup-epochs', type=int, default=5, metavar='N',
-                   help='epochs to warmup LR, if scheduler supports')
-group.add_argument('--warmup-prefix', action='store_true', default=False,
-                   help='Exclude warmup period from decay schedule.'),
-group.add_argument('--cooldown-epochs', type=int, default=0, metavar='N',
-                   help='epochs to cooldown LR at min_lr, after cyclic schedule ends')
-group.add_argument('--patience-epochs', type=int, default=10, metavar='N',
-                   help='patience epochs for Plateau LR scheduler (default: 10)')
-group.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE',
-                   help='LR decay rate (default: 0.1)')
-
-# Augmentation & regularization parameters
-group = parser.add_argument_group('Augmentation and regularization parameters')
-group.add_argument('--no-aug', action='store_true', default=False,
-                   help='Disable all training augmentation, override other train aug args')
-group.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT',
-                   help='Random resize scale (default: 0.08 1.0)')
-group.add_argument('--ratio', type=float, nargs='+', default=[3. / 4., 4. / 3.], metavar='RATIO',
-                   help='Random resize aspect ratio (default: 0.75 1.33)')
-group.add_argument('--hflip', type=float, default=0.5,
-                   help='Horizontal flip training aug probability')
-group.add_argument('--vflip', type=float, default=0.,
-                   help='Vertical flip training aug probability')
-group.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT',
-                   help='Color jitter factor (default: 0.4)')
-group.add_argument('--aa', type=str, default=None, metavar='NAME',
-                   help='Use AutoAugment policy. "v0" or "original". (default: None)'),
-group.add_argument('--aug-repeats', type=float, default=0,
-                   help='Number of augmentation repetitions (distributed training only) (default: 0)')
-group.add_argument('--aug-splits', type=int, default=0,
-                   help='Number of augmentation splits (default: 0, valid: 0 or >=2)')
-group.add_argument('--jsd-loss', action='store_true', default=False,
-                   help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.')
-group.add_argument('--bce-loss', action='store_true', default=False,
-                   help='Enable BCE loss w/ Mixup/CutMix use.')
-group.add_argument('--bce-target-thresh', type=float, default=None,
-                   help='Threshold for binarizing softened BCE targets (default: None, disabled)')
-group.add_argument('--reprob', type=float, default=0., metavar='PCT',
-                   help='Random erase prob (default: 0.)')
-group.add_argument('--remode', type=str, default='pixel',
-                   help='Random erase mode (default: "pixel")')
-group.add_argument('--recount', type=int, default=1,
-                   help='Random erase count (default: 1)')
-group.add_argument('--resplit', action='store_true', default=False,
-                   help='Do not random erase first (clean) augmentation split')
-group.add_argument('--mixup', type=float, default=0.0,
-                   help='mixup alpha, mixup enabled if > 0. (default: 0.)')
-group.add_argument('--cutmix', type=float, default=0.0,
-                   help='cutmix alpha, cutmix enabled if > 0. (default: 0.)')
-group.add_argument('--cutmix-minmax', type=float, nargs='+', default=None,
-                   help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)')
-group.add_argument('--mixup-prob', type=float, default=1.0,
-                   help='Probability of performing mixup or cutmix when either/both is enabled')
-group.add_argument('--mixup-switch-prob', type=float, default=0.5,
-                   help='Probability of switching to cutmix when both mixup and cutmix enabled')
-group.add_argument('--mixup-mode', type=str, default='batch',
-                   help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"')
-group.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N',
-                   help='Turn off mixup after this epoch, disabled if 0 (default: 0)')
-group.add_argument('--smoothing', type=float, default=0.1,
-                   help='Label smoothing (default: 0.1)')
-group.add_argument('--train-interpolation', type=str, default='random',
-                   help='Training interpolation (random, bilinear, bicubic default: "random")')
-group.add_argument('--drop', type=float, default=0.0, metavar='PCT',
-                   help='Dropout rate (default: 0.)')
-group.add_argument('--drop-connect', type=float, default=None, metavar='PCT',
-                   help='Drop connect rate, DEPRECATED, use drop-path (default: None)')
-group.add_argument('--drop-path', type=float, default=None, metavar='PCT',
-                   help='Drop path rate (default: None)')
-group.add_argument('--drop-block', type=float, default=None, metavar='PCT',
-                   help='Drop block rate (default: None)')
-
-# Batch norm parameters (only works with gen_efficientnet based models currently)
-group = parser.add_argument_group('Batch norm parameters', 'Only works with gen_efficientnet based models currently.')
-group.add_argument('--bn-momentum', type=float, default=None,
-                   help='BatchNorm momentum override (if not None)')
-group.add_argument('--bn-eps', type=float, default=None,
-                   help='BatchNorm epsilon override (if not None)')
-group.add_argument('--sync-bn', action='store_true',
-                   help='Enable NVIDIA Apex or Torch synchronized BatchNorm.')
-group.add_argument('--dist-bn', type=str, default='reduce',
-                   help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")')
-group.add_argument('--split-bn', action='store_true',
-                   help='Enable separate BN layers per augmentation split.')
-
-# Model Exponential Moving Average
-group = parser.add_argument_group('Model exponential moving average parameters')
-group.add_argument('--model-ema', action='store_true', default=False,
-                   help='Enable tracking moving average of model weights')
-group.add_argument('--model-ema-force-cpu', action='store_true', default=False,
-                   help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.')
-group.add_argument('--model-ema-decay', type=float, default=0.9998,
-                   help='decay factor for model weights moving average (default: 0.9998)')
-
-# Misc
-group = parser.add_argument_group('Miscellaneous parameters')
-group.add_argument('--seed', type=int, default=42, metavar='S',
-                   help='random seed (default: 42)')
-group.add_argument('--worker-seeding', type=str, default='all',
-                   help='worker seed mode (default: all)')
-group.add_argument('--log-interval', type=int, default=50, metavar='N',
-                   help='how many batches to wait before logging training status')
-group.add_argument('--recovery-interval', type=int, default=0, metavar='N',
-                   help='how many batches to wait before writing recovery checkpoint')
-group.add_argument('--checkpoint-hist', type=int, default=10, metavar='N',
-                   help='number of checkpoints to keep (default: 10)')
-group.add_argument('-j', '--workers', type=int, default=4, metavar='N',
-                   help='how many training processes to use (default: 4)')
-group.add_argument('--save-images', action='store_true', default=False,
-                   help='save images of input bathes every log interval for debugging')
-group.add_argument('--amp', action='store_true', default=False,
-                   help='use NVIDIA Apex AMP or Native AMP for mixed precision training')
-group.add_argument('--amp-dtype', default='float16', type=str,
-                   help='lower precision AMP dtype (default: float16)')
-group.add_argument('--amp-impl', default='native', type=str,
-                   help='AMP impl to use, "native" or "apex" (default: native)')
-group.add_argument('--no-ddp-bb', action='store_true', default=False,
-                   help='Force broadcast buffers for native DDP to off.')
-group.add_argument('--pin-mem', action='store_true', default=False,
-                   help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
-group.add_argument('--no-prefetcher', action='store_true', default=False,
-                   help='disable fast prefetcher')
-group.add_argument('--output', default='', type=str, metavar='PATH',
-                   help='path to output folder (default: none, current dir)')
-group.add_argument('--experiment', default='', type=str, metavar='NAME',
-                   help='name of train experiment, name of sub-folder for output')
-group.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC',
-                   help='Best metric (default: "top1"')
-group.add_argument('--tta', type=int, default=0, metavar='N',
-                   help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)')
-group.add_argument("--local_rank", default=0, type=int)
-group.add_argument('--use-multi-epochs-loader', action='store_true', default=False,
-                   help='use the multi-epochs-loader to save time at the beginning of every epoch')
-group.add_argument('--log-wandb', action='store_true', default=False,
-                   help='log training and validation metrics to wandb')
-
-group = parser.add_argument_group('INC pruning/distillation parameters')
-group.add_argument('--do-prune', action='store_true', default=False,
-                    help='do the pruning')
-group.add_argument('--do-distillation', action='store_true', default=False,
-                    help='do distillation')
-group.add_argument('--distillation-loss-weight', type=float, default=1.0,
-                    help='the coefficient of teacher-student distillation loss')
-group.add_argument('--update-frequency-on-step', type=int, default=2000,
-                    help='frequency of doing pruning')
-group.add_argument('--target-sparsity', type=float, default=0.9,
-                    help='the coefficient of teacher-student distillation loss')
-group.add_argument('--pruning-pattern', type=str, default="1x1",
-                    help='the coefficient of teacher-student distillation loss')
-group.add_argument('--no-cuda', action='store_true', default=False,
-                    help='user defined device setter.')
-group.add_argument('--distributed', action='store_true', default=False,
-                    help='Whether do distributed training.')
-group.add_argument('--dist_backend', type=str, default="nccl",
-                    help='The backend to be used for distributed training.')
-
-def _parse_args():
-    # Do we have a config file to parse?
-    args_config, remaining = config_parser.parse_known_args()
-    if args_config.config:
-        with open(args_config.config, 'r') as f:
-            cfg = yaml.safe_load(f)
-            parser.set_defaults(**cfg)
-
-    # The main arg parser parses the rest of the args, the usual
-    # defaults will have been overridden if config file specified.
-    args = parser.parse_args(remaining)
-
-    # Cache the args as a text string to save them in the output dir later
-    args_text = yaml.safe_dump(args.__dict__, default_flow_style=False)
-    return args, args_text
-
-
-def main():
-    utils.setup_default_logging()
-    args, args_text = _parse_args()
-
-    if torch.cuda.is_available():
-        torch.backends.cuda.matmul.allow_tf32 = True
-        torch.backends.cudnn.benchmark = True
-
-    args.prefetcher = not args.no_prefetcher
-    device = utils.init_distributed_device(args)
-
-    # user define args.no_cuda, even if the machine have GPU, user can still choose CPU to train.
-    if args.no_cuda:
-        device = torch.device("cpu")
-    if torch.cuda.is_available() and device.type == "cpu":
-        _logger.warning(f"Your device has cuda enabled, but your model will be pruned on cpu.")
-        device = torch.device("cpu")
-
-    if args.distributed:
-        _logger.info(
-            'Training in distributed mode with multiple processes, 1 device per process.'
-            f'Process {args.rank}, total {args.world_size}, device {args.device}.')
-    else:
-        _logger.info(f'Training with a single process on 1 device ({args.device}).')
-    assert args.rank >= 0
-
-    # resolve AMP arguments based on PyTorch / Apex availability
-    use_amp = None
-    amp_dtype = torch.float16
-    if args.amp:
-        if args.amp_impl == 'apex':
-            assert has_apex, 'AMP impl specified as APEX but APEX is not installed.'
-            use_amp = 'apex'
-            assert args.amp_dtype == 'float16'
-        else:
-            assert has_native_amp, 'Please update PyTorch to a version with native AMP (or use APEX).'
-            use_amp = 'native'
-            assert args.amp_dtype in ('float16', 'bfloat16')
-        if args.amp_dtype == 'bfloat16':
-            amp_dtype = torch.bfloat16
-
-    utils.random_seed(args.seed, args.rank)
-
-    if args.fuser:
-        utils.set_jit_fuser(args.fuser)
-    if args.fast_norm:
-        set_fast_norm()
-
-    in_chans = 3
-    if args.in_chans is not None:
-        in_chans = args.in_chans
-    elif args.input_size is not None:
-        in_chans = args.input_size[0]
-
-    model = create_model(
-        args.model,
-        pretrained=args.pretrained,
-        in_chans=in_chans,
-        num_classes=args.num_classes,
-        drop_rate=args.drop,
-        drop_path_rate=args.drop_path,
-        drop_block_rate=args.drop_block,
-        global_pool=args.gp,
-        bn_momentum=args.bn_momentum,
-        bn_eps=args.bn_eps,
-        scriptable=args.torchscript,
-        checkpoint_path=args.initial_checkpoint,
-        **args.model_kwargs,
-    )
-    # add teacher model for distillation:
-    if args.do_distillation:
-        teacher_model = create_model(
-            args.model,
-            pretrained=args.pretrained,
-            in_chans=in_chans,
-            num_classes=args.num_classes,
-            drop_rate=args.drop,
-            drop_path_rate=args.drop_path,
-            drop_block_rate=args.drop_block,
-            global_pool=args.gp,
-            bn_momentum=args.bn_momentum,
-            bn_eps=args.bn_eps,
-            scriptable=args.torchscript,
-            checkpoint_path=args.initial_checkpoint,
-        )
-
-    if args.num_classes is None:
-        assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.'
-        args.num_classes = model.num_classes  # FIXME handle model default vs config num_classes more elegantly
-
-    if args.grad_checkpointing:
-        model.set_grad_checkpointing(enable=True)
-
-    if utils.is_primary(args):
-        _logger.info(
-            f'Model {safe_model_name(args.model)} created, param count:{sum([m.numel() for m in model.parameters()])}')
-
-    data_config = resolve_data_config(vars(args), model=model, verbose=utils.is_primary(args))
-
-    # setup augmentation batch splits for contrastive loss or split bn
-    num_aug_splits = 0
-    if args.aug_splits > 0:
-        assert args.aug_splits > 1, 'A split of 1 makes no sense'
-        num_aug_splits = args.aug_splits
-
-    # enable split bn (separate bn stats per batch-portion)
-    if args.split_bn:
-        assert num_aug_splits > 1 or args.resplit
-        model = convert_splitbn_model(model, max(num_aug_splits, 2))
-
-    # move model to GPU, enable channels last layout if set
-    model.to(device=device)
-    if args.do_distillation: teacher_model.to(device=device)
-
-    if args.channels_last:
-        model.to(memory_format=torch.channels_last)
-
-    # setup synchronized BatchNorm for distributed training
-    if args.distributed and args.sync_bn:
-        args.dist_bn = ''  # disable dist_bn when sync BN active
-        assert not args.split_bn
-        if has_apex and use_amp == 'apex':
-            # Apex SyncBN used with Apex AMP
-            # WARNING this won't currently work with models using BatchNormAct2d
-            model = convert_syncbn_model(model)
-        else:
-            model = convert_sync_batchnorm(model)
-        if utils.is_primary(args):
-            _logger.info(
-                'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
-                'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')
-
-    if args.torchscript:
-        assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model'
-        assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model'
-        model = torch.jit.script(model)
-    elif args.torchcompile:
-        # FIXME dynamo might need move below DDP wrapping? TBD
-        assert has_compile, 'A version of torch w/ torch.compile() is required for --compile, possibly a nightly.'
-        torch._dynamo.reset()
-        model = torch.compile(model, backend=args.torchcompile)
-    elif args.aot_autograd:
-        assert has_functorch, "functorch is needed for --aot-autograd"
-        model = memory_efficient_fusion(model)
-
-    if not args.lr:
-        global_batch_size = args.batch_size * args.world_size
-        batch_ratio = global_batch_size / args.lr_base_size
-        if not args.lr_base_scale:
-            on = args.opt.lower()
-            args.lr_base_scale = 'sqrt' if any([o in on for o in ('ada', 'lamb')]) else 'linear'
-        if args.lr_base_scale == 'sqrt':
-            batch_ratio = batch_ratio ** 0.5
-        args.lr = args.lr_base * batch_ratio
-        if utils.is_primary(args):
-            _logger.info(
-                f'Learning rate ({args.lr}) calculated from base learning rate ({args.lr_base}) '
-                f'and global batch size ({global_batch_size}) with {args.lr_base_scale} scaling.')
-
-    optimizer = create_optimizer_v2(
-        model,
-        **optimizer_kwargs(cfg=args),
-        **args.opt_kwargs,
-    )
-
-    # setup automatic mixed-precision (AMP) loss scaling and op casting
-    amp_autocast = suppress  # do nothing
-    loss_scaler = None
-    if use_amp == 'apex':
-        assert device.type == 'cuda'
-        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
-        loss_scaler = ApexScaler()
-        if utils.is_primary(args):
-            _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.')
-    elif use_amp == 'native':
-        amp_autocast = partial(torch.autocast, device_type=device.type, dtype=amp_dtype)
-        if device.type == 'cuda':
-            loss_scaler = NativeScaler()
-        if utils.is_primary(args):
-            _logger.info('Using native Torch AMP. Training in mixed precision.')
-    else:
-        if utils.is_primary(args):
-            _logger.info('AMP not enabled. Training in float32.')
-
-    # optionally resume from a checkpoint
-    resume_epoch = None
-    if args.resume:
-        resume_epoch = resume_checkpoint(
-            model,
-            args.resume,
-            optimizer=None if args.no_resume_opt else optimizer,
-            loss_scaler=None if args.no_resume_opt else loss_scaler,
-            log_info=utils.is_primary(args),
-        )
-
-    # setup exponential moving average of model weights, SWA could be used here too
-    model_ema = None
-    if args.model_ema:
-        # Important to create EMA model after cuda(), DP wrapper, and AMP but before DDP wrapper
-        model_ema = utils.ModelEmaV2(
-            model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None)
-        if args.resume:
-            load_checkpoint(model_ema.module, args.resume, use_ema=True)
-
-    # setup distributed training
-    if args.distributed:
-        if has_apex and use_amp == 'apex':
-            # Apex DDP preferred unless native amp is activated
-            if utils.is_primary(args):
-                _logger.info("Using NVIDIA APEX DistributedDataParallel.")
-            model = ApexDDP(model, delay_allreduce=True)
-        else:
-            if utils.is_primary(args):
-                _logger.info("Using native Torch DistributedDataParallel.")
-            model = NativeDDP(model, device_ids=None if args.no_cuda else [device], broadcast_buffers=not args.no_ddp_bb)
-        # NOTE: EMA model does not need to be wrapped by DDP
-
-    # create the train and eval datasets
-    if args.data and not args.data_dir:
-        args.data_dir = args.data
-    dataset_train = create_dataset(
-        args.dataset,
-        root=args.data_dir,
-        split=args.train_split,
-        is_training=True,
-        class_map=args.class_map,
-        download=args.dataset_download,
-        batch_size=args.batch_size,
-        seed=args.seed,
-        repeats=args.epoch_repeats,
-    )
-
-    dataset_eval = create_dataset(
-        args.dataset,
-        root=args.data_dir,
-        split=args.val_split,
-        is_training=False,
-        class_map=args.class_map,
-        download=args.dataset_download,
-        batch_size=args.batch_size,
-    )
-
-    # setup mixup / cutmix
-    collate_fn = None
-    mixup_fn = None
-    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
-    if mixup_active:
-        mixup_args = dict(
-            mixup_alpha=args.mixup,
-            cutmix_alpha=args.cutmix,
-            cutmix_minmax=args.cutmix_minmax,
-            prob=args.mixup_prob,
-            switch_prob=args.mixup_switch_prob,
-            mode=args.mixup_mode,
-            label_smoothing=args.smoothing,
-            num_classes=args.num_classes
-        )
-        if args.prefetcher:
-            assert not num_aug_splits  # collate conflict (need to support deinterleaving in collate mixup)
-            collate_fn = FastCollateMixup(**mixup_args)
-        else:
-            mixup_fn = Mixup(**mixup_args)
-
-    # wrap dataset in AugMix helper
-    if num_aug_splits > 1:
-        dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits)
-
-    # create data loaders w/ augmentation pipeiine
-    train_interpolation = args.train_interpolation
-    if args.no_aug or not train_interpolation:
-        train_interpolation = data_config['interpolation']
-    loader_train = create_loader(
-        dataset_train,
-        input_size=data_config['input_size'],
-        batch_size=args.batch_size,
-        is_training=True,
-        use_prefetcher=args.prefetcher,
-        no_aug=args.no_aug,
-        re_prob=args.reprob,
-        re_mode=args.remode,
-        re_count=args.recount,
-        re_split=args.resplit,
-        scale=args.scale,
-        ratio=args.ratio,
-        hflip=args.hflip,
-        vflip=args.vflip,
-        color_jitter=args.color_jitter,
-        auto_augment=args.aa,
-        num_aug_repeats=args.aug_repeats,
-        num_aug_splits=num_aug_splits,
-        interpolation=train_interpolation,
-        mean=data_config['mean'],
-        std=data_config['std'],
-        num_workers=args.workers,
-        distributed=args.distributed,
-        collate_fn=collate_fn,
-        pin_memory=args.pin_mem,
-        device=device,
-        use_multi_epochs_loader=args.use_multi_epochs_loader,
-        worker_seeding=args.worker_seeding,
-    )
-
-    eval_workers = args.workers
-    if args.distributed and ('tfds' in args.dataset or 'wds' in args.dataset):
-        # FIXME reduces validation padding issues when using TFDS, WDS w/ workers and distributed training
-        eval_workers = min(2, args.workers)
-    loader_eval = create_loader(
-        dataset_eval,
-        input_size=data_config['input_size'],
-        batch_size=args.validation_batch_size or args.batch_size,
-        is_training=False,
-        use_prefetcher=args.prefetcher,
-        interpolation=data_config['interpolation'],
-        mean=data_config['mean'],
-        std=data_config['std'],
-        num_workers=eval_workers,
-        distributed=args.distributed,
-        crop_pct=data_config['crop_pct'],
-        pin_memory=args.pin_mem,
-        device=device,
-    )
-
-    # setup loss function
-    if args.jsd_loss:
-        assert num_aug_splits > 1  # JSD only valid with aug splits set
-        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing)
-    elif mixup_active:
-        # smoothing is handled with mixup target transform which outputs sparse, soft targets
-        if args.bce_loss:
-            train_loss_fn = BinaryCrossEntropy(target_threshold=args.bce_target_thresh)
-        else:
-            train_loss_fn = SoftTargetCrossEntropy()
-    elif args.smoothing:
-        if args.bce_loss:
-            train_loss_fn = BinaryCrossEntropy(smoothing=args.smoothing, target_threshold=args.bce_target_thresh)
-        else:
-            train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing)
-    else:
-        train_loss_fn = nn.CrossEntropyLoss()
-    train_loss_fn = train_loss_fn.to(device=device)
-    validate_loss_fn = nn.CrossEntropyLoss().to(device=device)
-
-    # setup checkpoint saver and eval metric tracking
-    eval_metric = args.eval_metric
-    best_metric = None
-    best_epoch = None
-    saver = None
-    output_dir = None
-    if utils.is_primary(args):
-        if args.experiment:
-            exp_name = args.experiment
-        else:
-            exp_name = '-'.join([
-                datetime.now().strftime("%Y%m%d-%H%M%S"),
-                safe_model_name(args.model),
-                str(data_config['input_size'][-1])
-            ])
-        output_dir = utils.get_outdir(args.output if args.output else './output/train', exp_name)
-        decreasing = True if eval_metric == 'loss' else False
-        saver = utils.CheckpointSaver(
-            model=model,
-            optimizer=optimizer,
-            args=args,
-            model_ema=model_ema,
-            amp_scaler=loss_scaler,
-            checkpoint_dir=output_dir,
-            recovery_dir=output_dir,
-            decreasing=decreasing,
-            max_history=args.checkpoint_hist
-        )
-        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
-            f.write(args_text)
-
-    if utils.is_primary(args) and args.log_wandb:
-        if has_wandb:
-            wandb.init(project=args.experiment, config=args)
-        else:
-            _logger.warning(
-                "You've requested to log metrics to wandb but package not found. "
-                "Metrics not being logged to wandb, try `pip install wandb`")
-
-    # setup learning rate schedule and starting epoch
-    updates_per_epoch = len(loader_train)
-    lr_scheduler, num_epochs = create_scheduler_v2(
-        optimizer,
-        **scheduler_kwargs(args),
-        updates_per_epoch=updates_per_epoch,
-    )
-    start_epoch = 0
-    if args.start_epoch is not None:
-        # a specified start_epoch will always override the resume epoch
-        start_epoch = args.start_epoch
-    elif resume_epoch is not None:
-        start_epoch = resume_epoch
-    if lr_scheduler is not None and start_epoch > 0:
-        if args.sched_on_updates:
-            lr_scheduler.step_update(start_epoch * updates_per_epoch)
-        else:
-            lr_scheduler.step(start_epoch)
-
-    if utils.is_primary(args):
-        _logger.info(
-            f'Scheduled epochs: {num_epochs}. LR stepped per {"epoch" if lr_scheduler.t_in_epochs else "update"}.')
-
-    # ***INC Pruning API Preparation***
-    
-    # Step 1: Load the INC packages
-    from neural_compressor.training import prepare_compression
-    from neural_compressor.training import WeightPruningConfig
-    # Step 2: epochs <==> steps conversion, use steps to initialize the pruning object
-    num_iterations = len(loader_train)
-    num_warmup_steps = int(args.warmup_epochs * num_iterations)
-    # Step 3: args.warmup_epochs, args.cooldown_epochs => pruner.start(end)_steps
-    if args.do_prune:
-        start_step = num_warmup_steps
-        end_step = (num_epochs - args.cooldown_epochs) * num_iterations
-    else:
-        # set start_step/end_step to a value which pruner can never arrive
-        start_step = int(num_iterations * (args.epochs + 1))
-        end_step = start_step
-    # Step 4: Define the pruning config
-    pruning_configs=[
-        {
-            "pruning_type": "snip_momentum",
-            "pruning_scope": "global",
-            "sparsity_decay_type": "exp",
-            "op_names": ["layer"],
-            "excluded_op_names": ["head", "fc", ".downsample"], # totally 48 conv-layers will be pruned
-            "pruning_op_types": ["Conv2d"],
-            "max_sparsity_ratio_per_op": 0.98
-        }
-    ]
-    configs = WeightPruningConfig(
-        pruning_configs,
-        target_sparsity=args.target_sparsity,
-        pattern=args.pruning_pattern,
-        pruning_frequency=args.update_frequency_on_step,
-        start_step=start_step,
-        end_step=end_step
-    )
-    # Step 5: Define INC object for pruning  
-    compression_manager = prepare_compression(model=model, confs=configs)
-    compression_manager.callbacks.on_train_begin()
-    model = compression_manager.model
-
-    _logger.info("Before pruning, obtain the model's validation performance")
-    eval_metrics = validate(
-        model,
-        loader_eval,
-        validate_loss_fn,
-        args,
-        device=device,
-        amp_autocast=amp_autocast,
-    )
-    _logger.info(f"Model accuracy before pruning is {eval_metrics}")
-
-    try:
-        for epoch in range(start_epoch, num_epochs):
-            if hasattr(dataset_train, 'set_epoch'):
-                dataset_train.set_epoch(epoch)
-            elif args.distributed and hasattr(loader_train.sampler, 'set_epoch'):
-                loader_train.sampler.set_epoch(epoch)
-
-            train_metrics = train_one_epoch(
-                epoch,
-                model,
-                teacher_model,
-                loader_train,
-                optimizer,
-                train_loss_fn,
-                args,
-                device=device,
-                lr_scheduler=lr_scheduler,
-                saver=saver,
-                output_dir=output_dir,
-                amp_autocast=amp_autocast,
-                loss_scaler=loss_scaler,
-                model_ema=model_ema,
-                mixup_fn=mixup_fn,
-                compression_manager=compression_manager,
-            )
-
-            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
-                if utils.is_primary(args):
-                    _logger.info("Distributing BatchNorm running means and vars")
-                utils.distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
-
-            eval_metrics = validate(
-                model,
-                loader_eval,
-                validate_loss_fn,
-                args,
-                device=device,
-                amp_autocast=amp_autocast,
-            )
-
-            if model_ema is not None and not args.model_ema_force_cpu:
-                if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
-                    utils.distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')
-
-                ema_eval_metrics = validate(
-                    model_ema.module,
-                    loader_eval,
-                    validate_loss_fn,
-                    args,
-                    device=device,
-                    amp_autocast=amp_autocast,
-                    log_suffix=' (EMA)',
-                )
-                eval_metrics = ema_eval_metrics
-
-            if output_dir is not None:
-                lrs = [param_group['lr'] for param_group in optimizer.param_groups]
-                utils.update_summary(
-                    epoch,
-                    train_metrics,
-                    eval_metrics,
-                    filename=os.path.join(output_dir, 'summary.csv'),
-                    lr=sum(lrs) / len(lrs),
-                    write_header=best_metric is None,
-                    log_wandb=args.log_wandb and has_wandb,
-                )
-
-            if saver is not None:
-                # save proper checkpoint with eval metric
-                save_metric = eval_metrics[eval_metric]
-                best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric)
-
-            if lr_scheduler is not None:
-                # step LR for next epoch
-                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])
-
-    except KeyboardInterrupt:
-        pass
-
-    if best_metric is not None:
-        _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
-
-
-def train_one_epoch(
-        epoch,
-        model,
-        teacher_model,
-        loader,
-        optimizer,
-        loss_fn,
-        args,
-        device=None,
-        lr_scheduler=None,
-        saver=None,
-        output_dir=None,
-        amp_autocast=suppress,
-        loss_scaler=None,
-        model_ema=None,
-        mixup_fn=None,
-        compression_manager=None,
-):
-    if args.mixup_off_epoch and epoch >= args.mixup_off_epoch:
-        if args.prefetcher and loader.mixup_enabled:
-            loader.mixup_enabled = False
-        elif mixup_fn is not None:
-            mixup_fn.mixup_enabled = False
-
-    second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
-    batch_time_m = utils.AverageMeter()
-    data_time_m = utils.AverageMeter()
-    losses_m = utils.AverageMeter()
-
-    model.train()
-
-    end = time.time()
-    num_batches_per_epoch = len(loader)
-    last_idx = num_batches_per_epoch - 1
-    num_updates = epoch * num_batches_per_epoch
-    for batch_idx, (input, target) in enumerate(loader):
-        compression_manager.callbacks.on_step_begin(batch_id = batch_idx)
-        last_batch = batch_idx == last_idx
-        data_time_m.update(time.time() - end)
-        if not args.prefetcher:
-            input, target = input.to(device), target.to(device)
-            if mixup_fn is not None:
-                input, target = mixup_fn(input, target)
-        if args.channels_last:
-            input = input.contiguous(memory_format=torch.channels_last)
-
-        with amp_autocast():
-            output = model(input)
-            loss = loss_fn(output, target)
-
-        # use distillation
-        if args.do_distillation:
-            with torch.no_grad():
-                teacher_output = teacher_model(input)
-            distillation_loss = args.distillation_loss_weight * get_loss_one_logit(output, teacher_output)
-            loss += distillation_loss
-
-        if not args.distributed:
-            losses_m.update(loss.item(), input.size(0))
-
-        optimizer.zero_grad()
-        if loss_scaler is not None:
-            loss_scaler(
-                loss, optimizer,
-                clip_grad=args.clip_grad,
-                clip_mode=args.clip_mode,
-                parameters=model_parameters(model, exclude_head='agc' in args.clip_mode),
-                create_graph=second_order
-            )
-        else:
-            loss.backward(create_graph=second_order)
-            if args.clip_grad is not None:
-                utils.dispatch_clip_grad(
-                    model_parameters(model, exclude_head='agc' in args.clip_mode),
-                    value=args.clip_grad,
-                    mode=args.clip_mode
-                )
-            compression_manager.callbacks.on_before_optimizer_step()
-            optimizer.step()
-            compression_manager.callbacks.on_after_optimizer_step()
-
-        if model_ema is not None:
-            model_ema.update(model)
-
-        if device.type == 'cuda':
-            torch.cuda.synchronize()
-
-        num_updates += 1
-        batch_time_m.update(time.time() - end)
-        if last_batch or batch_idx % args.log_interval == 0:
-            lrl = [param_group['lr'] for param_group in optimizer.param_groups]
-            lr = sum(lrl) / len(lrl)
-
-            if args.distributed:
-                reduced_loss = utils.reduce_tensor(loss.data, args.world_size)
-                losses_m.update(reduced_loss.item(), input.size(0))
-
-            if utils.is_primary(args):
-                _logger.info(
-                    'Train: {} [{:>4d}/{} ({:>3.0f}%)]  '
-                    'Loss: {loss.val:#.4g} ({loss.avg:#.3g})  '
-                    'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s  '
-                    '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
-                    'LR: {lr:.3e}  '
-                    'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format(
-                        epoch,
-                        batch_idx, len(loader),
-                        100. * batch_idx / last_idx,
-                        loss=losses_m,
-                        batch_time=batch_time_m,
-                        rate=input.size(0) * args.world_size / batch_time_m.val,
-                        rate_avg=input.size(0) * args.world_size / batch_time_m.avg,
-                        lr=lr,
-                        data_time=data_time_m)
-                )
-
-                if args.save_images and output_dir:
-                    torchvision.utils.save_image(
-                        input,
-                        os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx),
-                        padding=0,
-                        normalize=True
-                    )
-
-        if saver is not None and args.recovery_interval and (
-                last_batch or (batch_idx + 1) % args.recovery_interval == 0):
-            saver.save_recovery(epoch, batch_idx=batch_idx)
-
-        if lr_scheduler is not None:
-            lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg)
-
-        end = time.time()
-        # end for
-
-    if hasattr(optimizer, 'sync_lookahead'):
-        optimizer.sync_lookahead()
-
-    return OrderedDict([('loss', losses_m.avg)])
-
-
-def validate(
-        model,
-        loader,
-        loss_fn,
-        args,
-        device=None,
-        amp_autocast=suppress,
-        log_suffix=''
-):
-    batch_time_m = utils.AverageMeter()
-    losses_m = utils.AverageMeter()
-    top1_m = utils.AverageMeter()
-    top5_m = utils.AverageMeter()
-
-    model.eval()
-
-    end = time.time()
-    last_idx = len(loader) - 1
-    with torch.no_grad():
-        for batch_idx, (input, target) in enumerate(loader):
-            last_batch = batch_idx == last_idx
-            if not args.prefetcher:
-                input = input.to(device)
-                target = target.to(device)
-            if args.channels_last:
-                input = input.contiguous(memory_format=torch.channels_last)
-
-            with amp_autocast():
-                output = model(input)
-                if isinstance(output, (tuple, list)):
-                    output = output[0]
-
-                # augmentation reduction
-                reduce_factor = args.tta
-                if reduce_factor > 1:
-                    output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2)
-                    target = target[0:target.size(0):reduce_factor]
-
-                loss = loss_fn(output, target)
-            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
-
-            if args.distributed:
-                reduced_loss = utils.reduce_tensor(loss.data, args.world_size)
-                acc1 = utils.reduce_tensor(acc1, args.world_size)
-                acc5 = utils.reduce_tensor(acc5, args.world_size)
-            else:
-                reduced_loss = loss.data
-
-            if device.type == 'cuda':
-                torch.cuda.synchronize()
-
-            losses_m.update(reduced_loss.item(), input.size(0))
-            top1_m.update(acc1.item(), output.size(0))
-            top5_m.update(acc5.item(), output.size(0))
-
-            batch_time_m.update(time.time() - end)
-            end = time.time()
-            if utils.is_primary(args) and (last_batch or batch_idx % args.log_interval == 0):
-                log_name = 'Test' + log_suffix
-                _logger.info(
-                    '{0}: [{1:>4d}/{2}]  '
-                    'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})  '
-                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
-                    'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f})  '
-                    'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format(
-                        log_name, batch_idx, last_idx,
-                        batch_time=batch_time_m,
-                        loss=losses_m,
-                        top1=top1_m,
-                        top5=top5_m)
-                )
-
-    metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg), ('top5', top5_m.avg)])
-
-    return metrics
-
-
-if __name__ == '__main__':
-    main()
+#!/usr/bin/env python3
+""" ImageNet Training Script
+This is intended to be a lean and easily modifiable ImageNet training script that reproduces ImageNet
+training results with some of the latest networks and training techniques. It favours canonical PyTorch
+and standard Python style over trying to be able to 'do it all.' That said, it offers quite a few speed
+and training result improvements over the usual PyTorch example scripts. Repurpose as you see fit.
+This script was started from an early version of the PyTorch ImageNet example
+(https://github.com/pytorch/examples/tree/master/imagenet)
+NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples
+(https://github.com/NVIDIA/apex/tree/master/examples/imagenet)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
+"""
+import argparse
+import logging
+import os
+import time
+from collections import OrderedDict
+from contextlib import suppress
+from datetime import datetime
+from functools import partial
+import sys
+sys.path.append("./")
+
+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import torchvision.utils
+import yaml
+from torch.nn.parallel import DistributedDataParallel as NativeDDP
+
+from timm import utils
+from timm.data import create_dataset, create_loader, resolve_data_config, Mixup, FastCollateMixup, AugMixDataset
+from timm.layers import convert_splitbn_model, convert_sync_batchnorm, set_fast_norm
+from timm.loss import JsdCrossEntropy, SoftTargetCrossEntropy, BinaryCrossEntropy, LabelSmoothingCrossEntropy
+from timm.models import create_model, safe_model_name, resume_checkpoint, load_checkpoint, model_parameters
+from timm.optim import create_optimizer_v2, optimizer_kwargs
+from timm.scheduler import create_scheduler_v2, scheduler_kwargs
+from timm.utils import ApexScaler, NativeScaler
+
+try:
+    from apex import amp
+    from apex.parallel import DistributedDataParallel as ApexDDP
+    from apex.parallel import convert_syncbn_model
+    has_apex = True
+except ImportError:
+    has_apex = False
+
+has_native_amp = False
+try:
+    if getattr(torch.cuda.amp, 'autocast') is not None:
+        has_native_amp = True
+except AttributeError:
+    pass
+
+try:
+    import wandb
+    has_wandb = True
+except ImportError:
+    has_wandb = False
+
+try:
+    from functorch.compile import memory_efficient_fusion
+    has_functorch = True
+except ImportError as e:
+    has_functorch = False
+
+has_compile = hasattr(torch, 'compile')
+
+def get_loss_one_logit(student_logit, teacher_logit):
+    t = 2.0 # distillation temperature
+    return F.kl_div(
+        input=F.log_softmax(student_logit / t, dim=-1),
+        target=F.softmax(teacher_logit / t, dim=-1),
+        reduction="batchmean"
+    ) * (t ** 2)
+
+_logger = logging.getLogger('train')
+
+# The first arg parser parses out only the --config argument, this argument is used to
+# load a yaml file containing key-values that override the defaults for the main parser below
+config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False)
+parser.add_argument('-c', '--config', default='', type=str, metavar='FILE',
+                    help='YAML config file specifying default arguments')
+
+
+parser = argparse.ArgumentParser(description='PyTorch ImageNet Training')
+
+# Dataset parameters
+group = parser.add_argument_group('Dataset parameters')
+# Keep this argument outside the dataset group because it is positional.
+parser.add_argument('data', nargs='?', metavar='DIR', const=None,
+                    help='path to dataset (positional is *deprecated*, use --data-dir)')
+parser.add_argument('--data-dir', metavar='DIR',
+                    help='path to dataset (root dir)')
+parser.add_argument('--dataset', metavar='NAME', default='',
+                    help='dataset type + name ("<type>/<name>") (default: ImageFolder or ImageTar if empty)')
+group.add_argument('--train-split', metavar='NAME', default='train',
+                   help='dataset train split (default: train)')
+group.add_argument('--val-split', metavar='NAME', default='validation',
+                   help='dataset validation split (default: validation)')
+group.add_argument('--dataset-download', action='store_true', default=False,
+                   help='Allow download of dataset for torch/ and tfds/ datasets that support it.')
+group.add_argument('--class-map', default='', type=str, metavar='FILENAME',
+                   help='path to class to idx mapping file (default: "")')
+
+# Model parameters
+group = parser.add_argument_group('Model parameters')
+group.add_argument('--model', default='resnet50', type=str, metavar='MODEL',
+                   help='Name of model to train (default: "resnet50")')
+group.add_argument('--pretrained', action='store_true', default=False,
+                   help='Start with pretrained version of specified network (if avail)')
+group.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH',
+                   help='Initialize model from this checkpoint (default: none)')
+group.add_argument('--resume', default='', type=str, metavar='PATH',
+                   help='Resume full model and optimizer state from checkpoint (default: none)')
+group.add_argument('--no-resume-opt', action='store_true', default=False,
+                   help='prevent resume of optimizer state when resuming model')
+group.add_argument('--num-classes', type=int, default=None, metavar='N',
+                   help='number of label classes (Model default if None)')
+group.add_argument('--gp', default=None, type=str, metavar='POOL',
+                   help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.')
+group.add_argument('--img-size', type=int, default=None, metavar='N',
+                   help='Image size (default: None => model default)')
+group.add_argument('--in-chans', type=int, default=None, metavar='N',
+                   help='Image input channels (default: None => 3)')
+group.add_argument('--input-size', default=None, nargs=3, type=int,
+                   metavar='N N N',
+                   help='Input all image dimensions (d h w, e.g. --input-size 3 224 224), uses model default if empty')
+group.add_argument('--crop-pct', default=None, type=float,
+                   metavar='N', help='Input image center crop percent (for validation only)')
+group.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN',
+                   help='Override mean pixel value of dataset')
+group.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
+                   help='Override std deviation of dataset')
+group.add_argument('--interpolation', default='', type=str, metavar='NAME',
+                   help='Image resize interpolation type (overrides model)')
+group.add_argument('-b', '--batch-size', type=int, default=128, metavar='N',
+                   help='Input batch size for training (default: 128)')
+group.add_argument('-vb', '--validation-batch-size', type=int, default=None, metavar='N',
+                   help='Validation batch size override (default: None)')
+group.add_argument('--channels-last', action='store_true', default=False,
+                   help='Use channels_last memory layout')
+group.add_argument('--fuser', default='', type=str,
+                   help="Select jit fuser. One of ('', 'te', 'old', 'nvfuser')")
+group.add_argument('--grad-checkpointing', action='store_true', default=False,
+                   help='Enable gradient checkpointing through model blocks/stages')
+group.add_argument('--fast-norm', default=False, action='store_true',
+                   help='enable experimental fast-norm')
+group.add_argument('--model-kwargs', nargs='*', default={}, action=utils.ParseKwargs)
+
+scripting_group = group.add_mutually_exclusive_group()
+scripting_group.add_argument('--torchscript', dest='torchscript', action='store_true',
+                             help='torch.jit.script the full model')
+scripting_group.add_argument('--torchcompile', nargs='?', type=str, default=None, const='inductor',
+                             help="Enable compilation w/ specified backend (default: inductor).")
+scripting_group.add_argument('--aot-autograd', default=False, action='store_true',
+                             help="Enable AOT Autograd support.")
+
+# Optimizer parameters
+group = parser.add_argument_group('Optimizer parameters')
+group.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
+                   help='Optimizer (default: "sgd")')
+group.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON',
+                   help='Optimizer Epsilon (default: None, use opt default)')
+group.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA',
+                   help='Optimizer Betas (default: None, use opt default)')
+group.add_argument('--momentum', type=float, default=0.9, metavar='M',
+                   help='Optimizer momentum (default: 0.9)')
+group.add_argument('--weight-decay', type=float, default=2e-5,
+                   help='weight decay (default: 2e-5)')
+group.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
+                   help='Clip gradient norm (default: None, no clipping)')
+group.add_argument('--clip-mode', type=str, default='norm',
+                   help='Gradient clipping mode. One of ("norm", "value", "agc")')
+group.add_argument('--layer-decay', type=float, default=None,
+                   help='layer-wise learning rate decay (default: None)')
+group.add_argument('--opt-kwargs', nargs='*', default={}, action=utils.ParseKwargs)
+
+# Learning rate schedule parameters
+group = parser.add_argument_group('Learning rate schedule parameters')
+group.add_argument('--sched', type=str, default='cosine', metavar='SCHEDULER',
+                   help='LR scheduler (default: "step"')
+group.add_argument('--sched-on-updates', action='store_true', default=False,
+                   help='Apply LR scheduler step on update instead of epoch end.')
+group.add_argument('--lr', type=float, default=None, metavar='LR',
+                   help='learning rate, overrides lr-base if set (default: None)')
+group.add_argument('--lr-base', type=float, default=0.1, metavar='LR',
+                   help='base learning rate: lr = lr_base * global_batch_size / base_size')
+group.add_argument('--lr-base-size', type=int, default=256, metavar='DIV',
+                   help='base learning rate batch size (divisor, default: 256).')
+group.add_argument('--lr-base-scale', type=str, default='', metavar='SCALE',
+                   help='base learning rate vs batch_size scaling ("linear", "sqrt", based on opt if empty)')
+group.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct',
+                   help='learning rate noise on/off epoch percentages')
+group.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT',
+                   help='learning rate noise limit percent (default: 0.67)')
+group.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV',
+                   help='learning rate noise std-dev (default: 1.0)')
+group.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT',
+                   help='learning rate cycle len multiplier (default: 1.0)')
+group.add_argument('--lr-cycle-decay', type=float, default=0.5, metavar='MULT',
+                   help='amount to decay each learning rate cycle (default: 0.5)')
+group.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N',
+                   help='learning rate cycle limit, cycles enabled if > 1')
+group.add_argument('--lr-k-decay', type=float, default=1.0,
+                   help='learning rate k-decay for cosine/poly (default: 1.0)')
+group.add_argument('--warmup-lr', type=float, default=1e-5, metavar='LR',
+                   help='warmup learning rate (default: 1e-5)')
+group.add_argument('--min-lr', type=float, default=0, metavar='LR',
+                   help='lower lr bound for cyclic schedulers that hit 0 (default: 0)')
+group.add_argument('--epochs', type=int, default=300, metavar='N',
+                   help='number of epochs to train (default: 300)')
+group.add_argument('--epoch-repeats', type=float, default=0., metavar='N',
+                   help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).')
+group.add_argument('--start-epoch', default=None, type=int, metavar='N',
+                   help='manual epoch number (useful on restarts)')
+group.add_argument('--decay-milestones', default=[90, 180, 270], type=int, nargs='+', metavar="MILESTONES",
+                   help='list of decay epoch indices for multistep lr. must be increasing')
+group.add_argument('--decay-epochs', type=float, default=90, metavar='N',
+                   help='epoch interval to decay LR')
+group.add_argument('--warmup-epochs', type=int, default=5, metavar='N',
+                   help='epochs to warmup LR, if scheduler supports')
+group.add_argument('--warmup-prefix', action='store_true', default=False,
+                   help='Exclude warmup period from decay schedule.'),
+group.add_argument('--cooldown-epochs', type=int, default=0, metavar='N',
+                   help='epochs to cooldown LR at min_lr, after cyclic schedule ends')
+group.add_argument('--patience-epochs', type=int, default=10, metavar='N',
+                   help='patience epochs for Plateau LR scheduler (default: 10)')
+group.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE',
+                   help='LR decay rate (default: 0.1)')
+
+# Augmentation & regularization parameters
+group = parser.add_argument_group('Augmentation and regularization parameters')
+group.add_argument('--no-aug', action='store_true', default=False,
+                   help='Disable all training augmentation, override other train aug args')
+group.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT',
+                   help='Random resize scale (default: 0.08 1.0)')
+group.add_argument('--ratio', type=float, nargs='+', default=[3. / 4., 4. / 3.], metavar='RATIO',
+                   help='Random resize aspect ratio (default: 0.75 1.33)')
+group.add_argument('--hflip', type=float, default=0.5,
+                   help='Horizontal flip training aug probability')
+group.add_argument('--vflip', type=float, default=0.,
+                   help='Vertical flip training aug probability')
+group.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT',
+                   help='Color jitter factor (default: 0.4)')
+group.add_argument('--aa', type=str, default=None, metavar='NAME',
+                   help='Use AutoAugment policy. "v0" or "original". (default: None)'),
+group.add_argument('--aug-repeats', type=float, default=0,
+                   help='Number of augmentation repetitions (distributed training only) (default: 0)')
+group.add_argument('--aug-splits', type=int, default=0,
+                   help='Number of augmentation splits (default: 0, valid: 0 or >=2)')
+group.add_argument('--jsd-loss', action='store_true', default=False,
+                   help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.')
+group.add_argument('--bce-loss', action='store_true', default=False,
+                   help='Enable BCE loss w/ Mixup/CutMix use.')
+group.add_argument('--bce-target-thresh', type=float, default=None,
+                   help='Threshold for binarizing softened BCE targets (default: None, disabled)')
+group.add_argument('--reprob', type=float, default=0., metavar='PCT',
+                   help='Random erase prob (default: 0.)')
+group.add_argument('--remode', type=str, default='pixel',
+                   help='Random erase mode (default: "pixel")')
+group.add_argument('--recount', type=int, default=1,
+                   help='Random erase count (default: 1)')
+group.add_argument('--resplit', action='store_true', default=False,
+                   help='Do not random erase first (clean) augmentation split')
+group.add_argument('--mixup', type=float, default=0.0,
+                   help='mixup alpha, mixup enabled if > 0. (default: 0.)')
+group.add_argument('--cutmix', type=float, default=0.0,
+                   help='cutmix alpha, cutmix enabled if > 0. (default: 0.)')
+group.add_argument('--cutmix-minmax', type=float, nargs='+', default=None,
+                   help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)')
+group.add_argument('--mixup-prob', type=float, default=1.0,
+                   help='Probability of performing mixup or cutmix when either/both is enabled')
+group.add_argument('--mixup-switch-prob', type=float, default=0.5,
+                   help='Probability of switching to cutmix when both mixup and cutmix enabled')
+group.add_argument('--mixup-mode', type=str, default='batch',
+                   help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"')
+group.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N',
+                   help='Turn off mixup after this epoch, disabled if 0 (default: 0)')
+group.add_argument('--smoothing', type=float, default=0.1,
+                   help='Label smoothing (default: 0.1)')
+group.add_argument('--train-interpolation', type=str, default='random',
+                   help='Training interpolation (random, bilinear, bicubic default: "random")')
+group.add_argument('--drop', type=float, default=0.0, metavar='PCT',
+                   help='Dropout rate (default: 0.)')
+group.add_argument('--drop-connect', type=float, default=None, metavar='PCT',
+                   help='Drop connect rate, DEPRECATED, use drop-path (default: None)')
+group.add_argument('--drop-path', type=float, default=None, metavar='PCT',
+                   help='Drop path rate (default: None)')
+group.add_argument('--drop-block', type=float, default=None, metavar='PCT',
+                   help='Drop block rate (default: None)')
+
+# Batch norm parameters (only works with gen_efficientnet based models currently)
+group = parser.add_argument_group('Batch norm parameters', 'Only works with gen_efficientnet based models currently.')
+group.add_argument('--bn-momentum', type=float, default=None,
+                   help='BatchNorm momentum override (if not None)')
+group.add_argument('--bn-eps', type=float, default=None,
+                   help='BatchNorm epsilon override (if not None)')
+group.add_argument('--sync-bn', action='store_true',
+                   help='Enable NVIDIA Apex or Torch synchronized BatchNorm.')
+group.add_argument('--dist-bn', type=str, default='reduce',
+                   help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")')
+group.add_argument('--split-bn', action='store_true',
+                   help='Enable separate BN layers per augmentation split.')
+
+# Model Exponential Moving Average
+group = parser.add_argument_group('Model exponential moving average parameters')
+group.add_argument('--model-ema', action='store_true', default=False,
+                   help='Enable tracking moving average of model weights')
+group.add_argument('--model-ema-force-cpu', action='store_true', default=False,
+                   help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.')
+group.add_argument('--model-ema-decay', type=float, default=0.9998,
+                   help='decay factor for model weights moving average (default: 0.9998)')
+
+# Misc
+group = parser.add_argument_group('Miscellaneous parameters')
+group.add_argument('--seed', type=int, default=42, metavar='S',
+                   help='random seed (default: 42)')
+group.add_argument('--worker-seeding', type=str, default='all',
+                   help='worker seed mode (default: all)')
+group.add_argument('--log-interval', type=int, default=50, metavar='N',
+                   help='how many batches to wait before logging training status')
+group.add_argument('--recovery-interval', type=int, default=0, metavar='N',
+                   help='how many batches to wait before writing recovery checkpoint')
+group.add_argument('--checkpoint-hist', type=int, default=10, metavar='N',
+                   help='number of checkpoints to keep (default: 10)')
+group.add_argument('-j', '--workers', type=int, default=4, metavar='N',
+                   help='how many training processes to use (default: 4)')
+group.add_argument('--save-images', action='store_true', default=False,
+                   help='save images of input bathes every log interval for debugging')
+group.add_argument('--amp', action='store_true', default=False,
+                   help='use NVIDIA Apex AMP or Native AMP for mixed precision training')
+group.add_argument('--amp-dtype', default='float16', type=str,
+                   help='lower precision AMP dtype (default: float16)')
+group.add_argument('--amp-impl', default='native', type=str,
+                   help='AMP impl to use, "native" or "apex" (default: native)')
+group.add_argument('--no-ddp-bb', action='store_true', default=False,
+                   help='Force broadcast buffers for native DDP to off.')
+group.add_argument('--pin-mem', action='store_true', default=False,
+                   help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.')
+group.add_argument('--no-prefetcher', action='store_true', default=False,
+                   help='disable fast prefetcher')
+group.add_argument('--output', default='', type=str, metavar='PATH',
+                   help='path to output folder (default: none, current dir)')
+group.add_argument('--experiment', default='', type=str, metavar='NAME',
+                   help='name of train experiment, name of sub-folder for output')
+group.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC',
+                   help='Best metric (default: "top1"')
+group.add_argument('--tta', type=int, default=0, metavar='N',
+                   help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)')
+group.add_argument("--local_rank", default=0, type=int)
+group.add_argument('--use-multi-epochs-loader', action='store_true', default=False,
+                   help='use the multi-epochs-loader to save time at the beginning of every epoch')
+group.add_argument('--log-wandb', action='store_true', default=False,
+                   help='log training and validation metrics to wandb')
+
+group = parser.add_argument_group('INC pruning/distillation parameters')
+group.add_argument('--do-prune', action='store_true', default=False,
+                    help='do the pruning')
+group.add_argument('--do-distillation', action='store_true', default=False,
+                    help='do distillation')
+group.add_argument('--distillation-loss-weight', type=float, default=1.0,
+                    help='the coefficient of teacher-student distillation loss')
+group.add_argument('--update-frequency-on-step', type=int, default=2000,
+                    help='frequency of doing pruning')
+group.add_argument('--target-sparsity', type=float, default=0.9,
+                    help='the coefficient of teacher-student distillation loss')
+group.add_argument('--pruning-pattern', type=str, default="1x1",
+                    help='the coefficient of teacher-student distillation loss')
+group.add_argument('--no-cuda', action='store_true', default=False,
+                    help='user defined device setter.')
+group.add_argument('--distributed', action='store_true', default=False,
+                    help='Whether do distributed training.')
+group.add_argument('--dist_backend', type=str, default="nccl",
+                    help='The backend to be used for distributed training.')
+
+def _parse_args():
+    # Do we have a config file to parse?
+    args_config, remaining = config_parser.parse_known_args()
+    if args_config.config:
+        with open(args_config.config, 'r') as f:
+            cfg = yaml.safe_load(f)
+            parser.set_defaults(**cfg)
+
+    # The main arg parser parses the rest of the args, the usual
+    # defaults will have been overridden if config file specified.
+    args = parser.parse_args(remaining)
+
+    # Cache the args as a text string to save them in the output dir later
+    args_text = yaml.safe_dump(args.__dict__, default_flow_style=False)
+    return args, args_text
+
+
+def main():
+    utils.setup_default_logging()
+    args, args_text = _parse_args()
+
+    if torch.cuda.is_available():
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.benchmark = True
+
+    args.prefetcher = not args.no_prefetcher
+    device = utils.init_distributed_device(args)
+
+    # user define args.no_cuda, even if the machine have GPU, user can still choose CPU to train.
+    if args.no_cuda:
+        device = torch.device("cpu")
+    if torch.cuda.is_available() and device.type == "cpu":
+        _logger.warning(f"Your device has cuda enabled, but your model will be pruned on cpu.")
+        device = torch.device("cpu")
+
+    if args.distributed:
+        _logger.info(
+            'Training in distributed mode with multiple processes, 1 device per process.'
+            f'Process {args.rank}, total {args.world_size}, device {args.device}.')
+    else:
+        _logger.info(f'Training with a single process on 1 device ({args.device}).')
+    assert args.rank >= 0
+
+    # resolve AMP arguments based on PyTorch / Apex availability
+    use_amp = None
+    amp_dtype = torch.float16
+    if args.amp:
+        if args.amp_impl == 'apex':
+            assert has_apex, 'AMP impl specified as APEX but APEX is not installed.'
+            use_amp = 'apex'
+            assert args.amp_dtype == 'float16'
+        else:
+            assert has_native_amp, 'Please update PyTorch to a version with native AMP (or use APEX).'
+            use_amp = 'native'
+            assert args.amp_dtype in ('float16', 'bfloat16')
+        if args.amp_dtype == 'bfloat16':
+            amp_dtype = torch.bfloat16
+
+    utils.random_seed(args.seed, args.rank)
+
+    if args.fuser:
+        utils.set_jit_fuser(args.fuser)
+    if args.fast_norm:
+        set_fast_norm()
+
+    in_chans = 3
+    if args.in_chans is not None:
+        in_chans = args.in_chans
+    elif args.input_size is not None:
+        in_chans = args.input_size[0]
+
+    model = create_model(
+        args.model,
+        pretrained=args.pretrained,
+        in_chans=in_chans,
+        num_classes=args.num_classes,
+        drop_rate=args.drop,
+        drop_path_rate=args.drop_path,
+        drop_block_rate=args.drop_block,
+        global_pool=args.gp,
+        bn_momentum=args.bn_momentum,
+        bn_eps=args.bn_eps,
+        scriptable=args.torchscript,
+        checkpoint_path=args.initial_checkpoint,
+        **args.model_kwargs,
+    )
+    # add teacher model for distillation:
+    if args.do_distillation:
+        teacher_model = create_model(
+            args.model,
+            pretrained=args.pretrained,
+            in_chans=in_chans,
+            num_classes=args.num_classes,
+            drop_rate=args.drop,
+            drop_path_rate=args.drop_path,
+            drop_block_rate=args.drop_block,
+            global_pool=args.gp,
+            bn_momentum=args.bn_momentum,
+            bn_eps=args.bn_eps,
+            scriptable=args.torchscript,
+            checkpoint_path=args.initial_checkpoint,
+        )
+
+    if args.num_classes is None:
+        assert hasattr(model, 'num_classes'), 'Model must have `num_classes` attr if not set on cmd line/config.'
+        args.num_classes = model.num_classes  # FIXME handle model default vs config num_classes more elegantly
+
+    if args.grad_checkpointing:
+        model.set_grad_checkpointing(enable=True)
+
+    if utils.is_primary(args):
+        _logger.info(
+            f'Model {safe_model_name(args.model)} created, param count:{sum([m.numel() for m in model.parameters()])}')
+
+    data_config = resolve_data_config(vars(args), model=model, verbose=utils.is_primary(args))
+
+    # setup augmentation batch splits for contrastive loss or split bn
+    num_aug_splits = 0
+    if args.aug_splits > 0:
+        assert args.aug_splits > 1, 'A split of 1 makes no sense'
+        num_aug_splits = args.aug_splits
+
+    # enable split bn (separate bn stats per batch-portion)
+    if args.split_bn:
+        assert num_aug_splits > 1 or args.resplit
+        model = convert_splitbn_model(model, max(num_aug_splits, 2))
+
+    # move model to GPU, enable channels last layout if set
+    model.to(device=device)
+    if args.do_distillation: teacher_model.to(device=device)
+
+    if args.channels_last:
+        model.to(memory_format=torch.channels_last)
+
+    # setup synchronized BatchNorm for distributed training
+    if args.distributed and args.sync_bn:
+        args.dist_bn = ''  # disable dist_bn when sync BN active
+        assert not args.split_bn
+        if has_apex and use_amp == 'apex':
+            # Apex SyncBN used with Apex AMP
+            # WARNING this won't currently work with models using BatchNormAct2d
+            model = convert_syncbn_model(model)
+        else:
+            model = convert_sync_batchnorm(model)
+        if utils.is_primary(args):
+            _logger.info(
+                'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
+                'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')
+
+    if args.torchscript:
+        assert not use_amp == 'apex', 'Cannot use APEX AMP with torchscripted model'
+        assert not args.sync_bn, 'Cannot use SyncBatchNorm with torchscripted model'
+        model = torch.jit.script(model)
+    elif args.torchcompile:
+        # FIXME dynamo might need move below DDP wrapping? TBD
+        assert has_compile, 'A version of torch w/ torch.compile() is required for --compile, possibly a nightly.'
+        torch._dynamo.reset()
+        model = torch.compile(model, backend=args.torchcompile)
+    elif args.aot_autograd:
+        assert has_functorch, "functorch is needed for --aot-autograd"
+        model = memory_efficient_fusion(model)
+
+    if not args.lr:
+        global_batch_size = args.batch_size * args.world_size
+        batch_ratio = global_batch_size / args.lr_base_size
+        if not args.lr_base_scale:
+            on = args.opt.lower()
+            args.lr_base_scale = 'sqrt' if any([o in on for o in ('ada', 'lamb')]) else 'linear'
+        if args.lr_base_scale == 'sqrt':
+            batch_ratio = batch_ratio ** 0.5
+        args.lr = args.lr_base * batch_ratio
+        if utils.is_primary(args):
+            _logger.info(
+                f'Learning rate ({args.lr}) calculated from base learning rate ({args.lr_base}) '
+                f'and global batch size ({global_batch_size}) with {args.lr_base_scale} scaling.')
+
+    optimizer = create_optimizer_v2(
+        model,
+        **optimizer_kwargs(cfg=args),
+        **args.opt_kwargs,
+    )
+
+    # setup automatic mixed-precision (AMP) loss scaling and op casting
+    amp_autocast = suppress  # do nothing
+    loss_scaler = None
+    if use_amp == 'apex':
+        assert device.type == 'cuda'
+        model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
+        loss_scaler = ApexScaler()
+        if utils.is_primary(args):
+            _logger.info('Using NVIDIA APEX AMP. Training in mixed precision.')
+    elif use_amp == 'native':
+        amp_autocast = partial(torch.autocast, device_type=device.type, dtype=amp_dtype)
+        if device.type == 'cuda':
+            loss_scaler = NativeScaler()
+        if utils.is_primary(args):
+            _logger.info('Using native Torch AMP. Training in mixed precision.')
+    else:
+        if utils.is_primary(args):
+            _logger.info('AMP not enabled. Training in float32.')
+
+    # optionally resume from a checkpoint
+    resume_epoch = None
+    if args.resume:
+        resume_epoch = resume_checkpoint(
+            model,
+            args.resume,
+            optimizer=None if args.no_resume_opt else optimizer,
+            loss_scaler=None if args.no_resume_opt else loss_scaler,
+            log_info=utils.is_primary(args),
+        )
+
+    # setup exponential moving average of model weights, SWA could be used here too
+    model_ema = None
+    if args.model_ema:
+        # Important to create EMA model after cuda(), DP wrapper, and AMP but before DDP wrapper
+        model_ema = utils.ModelEmaV2(
+            model, decay=args.model_ema_decay, device='cpu' if args.model_ema_force_cpu else None)
+        if args.resume:
+            load_checkpoint(model_ema.module, args.resume, use_ema=True)
+
+    # setup distributed training
+    if args.distributed:
+        if has_apex and use_amp == 'apex':
+            # Apex DDP preferred unless native amp is activated
+            if utils.is_primary(args):
+                _logger.info("Using NVIDIA APEX DistributedDataParallel.")
+            model = ApexDDP(model, delay_allreduce=True)
+        else:
+            if utils.is_primary(args):
+                _logger.info("Using native Torch DistributedDataParallel.")
+            model = NativeDDP(model, device_ids=None if args.no_cuda else [device], broadcast_buffers=not args.no_ddp_bb)
+        # NOTE: EMA model does not need to be wrapped by DDP
+
+    # create the train and eval datasets
+    if args.data and not args.data_dir:
+        args.data_dir = args.data
+    dataset_train = create_dataset(
+        args.dataset,
+        root=args.data_dir,
+        split=args.train_split,
+        is_training=True,
+        class_map=args.class_map,
+        download=args.dataset_download,
+        batch_size=args.batch_size,
+        seed=args.seed,
+        repeats=args.epoch_repeats,
+    )
+
+    dataset_eval = create_dataset(
+        args.dataset,
+        root=args.data_dir,
+        split=args.val_split,
+        is_training=False,
+        class_map=args.class_map,
+        download=args.dataset_download,
+        batch_size=args.batch_size,
+    )
+
+    # setup mixup / cutmix
+    collate_fn = None
+    mixup_fn = None
+    mixup_active = args.mixup > 0 or args.cutmix > 0. or args.cutmix_minmax is not None
+    if mixup_active:
+        mixup_args = dict(
+            mixup_alpha=args.mixup,
+            cutmix_alpha=args.cutmix,
+            cutmix_minmax=args.cutmix_minmax,
+            prob=args.mixup_prob,
+            switch_prob=args.mixup_switch_prob,
+            mode=args.mixup_mode,
+            label_smoothing=args.smoothing,
+            num_classes=args.num_classes
+        )
+        if args.prefetcher:
+            assert not num_aug_splits  # collate conflict (need to support deinterleaving in collate mixup)
+            collate_fn = FastCollateMixup(**mixup_args)
+        else:
+            mixup_fn = Mixup(**mixup_args)
+
+    # wrap dataset in AugMix helper
+    if num_aug_splits > 1:
+        dataset_train = AugMixDataset(dataset_train, num_splits=num_aug_splits)
+
+    # create data loaders w/ augmentation pipeiine
+    train_interpolation = args.train_interpolation
+    if args.no_aug or not train_interpolation:
+        train_interpolation = data_config['interpolation']
+    loader_train = create_loader(
+        dataset_train,
+        input_size=data_config['input_size'],
+        batch_size=args.batch_size,
+        is_training=True,
+        use_prefetcher=args.prefetcher,
+        no_aug=args.no_aug,
+        re_prob=args.reprob,
+        re_mode=args.remode,
+        re_count=args.recount,
+        re_split=args.resplit,
+        scale=args.scale,
+        ratio=args.ratio,
+        hflip=args.hflip,
+        vflip=args.vflip,
+        color_jitter=args.color_jitter,
+        auto_augment=args.aa,
+        num_aug_repeats=args.aug_repeats,
+        num_aug_splits=num_aug_splits,
+        interpolation=train_interpolation,
+        mean=data_config['mean'],
+        std=data_config['std'],
+        num_workers=args.workers,
+        distributed=args.distributed,
+        collate_fn=collate_fn,
+        pin_memory=args.pin_mem,
+        device=device,
+        use_multi_epochs_loader=args.use_multi_epochs_loader,
+        worker_seeding=args.worker_seeding,
+    )
+
+    eval_workers = args.workers
+    if args.distributed and ('tfds' in args.dataset or 'wds' in args.dataset):
+        # FIXME reduces validation padding issues when using TFDS, WDS w/ workers and distributed training
+        eval_workers = min(2, args.workers)
+    loader_eval = create_loader(
+        dataset_eval,
+        input_size=data_config['input_size'],
+        batch_size=args.validation_batch_size or args.batch_size,
+        is_training=False,
+        use_prefetcher=args.prefetcher,
+        interpolation=data_config['interpolation'],
+        mean=data_config['mean'],
+        std=data_config['std'],
+        num_workers=eval_workers,
+        distributed=args.distributed,
+        crop_pct=data_config['crop_pct'],
+        pin_memory=args.pin_mem,
+        device=device,
+    )
+
+    # setup loss function
+    if args.jsd_loss:
+        assert num_aug_splits > 1  # JSD only valid with aug splits set
+        train_loss_fn = JsdCrossEntropy(num_splits=num_aug_splits, smoothing=args.smoothing)
+    elif mixup_active:
+        # smoothing is handled with mixup target transform which outputs sparse, soft targets
+        if args.bce_loss:
+            train_loss_fn = BinaryCrossEntropy(target_threshold=args.bce_target_thresh)
+        else:
+            train_loss_fn = SoftTargetCrossEntropy()
+    elif args.smoothing:
+        if args.bce_loss:
+            train_loss_fn = BinaryCrossEntropy(smoothing=args.smoothing, target_threshold=args.bce_target_thresh)
+        else:
+            train_loss_fn = LabelSmoothingCrossEntropy(smoothing=args.smoothing)
+    else:
+        train_loss_fn = nn.CrossEntropyLoss()
+    train_loss_fn = train_loss_fn.to(device=device)
+    validate_loss_fn = nn.CrossEntropyLoss().to(device=device)
+
+    # setup checkpoint saver and eval metric tracking
+    eval_metric = args.eval_metric
+    best_metric = None
+    best_epoch = None
+    saver = None
+    output_dir = None
+    if utils.is_primary(args):
+        if args.experiment:
+            exp_name = args.experiment
+        else:
+            exp_name = '-'.join([
+                datetime.now().strftime("%Y%m%d-%H%M%S"),
+                safe_model_name(args.model),
+                str(data_config['input_size'][-1])
+            ])
+        output_dir = utils.get_outdir(args.output if args.output else './output/train', exp_name)
+        decreasing = True if eval_metric == 'loss' else False
+        saver = utils.CheckpointSaver(
+            model=model,
+            optimizer=optimizer,
+            args=args,
+            model_ema=model_ema,
+            amp_scaler=loss_scaler,
+            checkpoint_dir=output_dir,
+            recovery_dir=output_dir,
+            decreasing=decreasing,
+            max_history=args.checkpoint_hist
+        )
+        with open(os.path.join(output_dir, 'args.yaml'), 'w') as f:
+            f.write(args_text)
+
+    if utils.is_primary(args) and args.log_wandb:
+        if has_wandb:
+            wandb.init(project=args.experiment, config=args)
+        else:
+            _logger.warning(
+                "You've requested to log metrics to wandb but package not found. "
+                "Metrics not being logged to wandb, try `pip install wandb`")
+
+    # setup learning rate schedule and starting epoch
+    updates_per_epoch = len(loader_train)
+    lr_scheduler, num_epochs = create_scheduler_v2(
+        optimizer,
+        **scheduler_kwargs(args),
+        updates_per_epoch=updates_per_epoch,
+    )
+    start_epoch = 0
+    if args.start_epoch is not None:
+        # a specified start_epoch will always override the resume epoch
+        start_epoch = args.start_epoch
+    elif resume_epoch is not None:
+        start_epoch = resume_epoch
+    if lr_scheduler is not None and start_epoch > 0:
+        if args.sched_on_updates:
+            lr_scheduler.step_update(start_epoch * updates_per_epoch)
+        else:
+            lr_scheduler.step(start_epoch)
+
+    if utils.is_primary(args):
+        _logger.info(
+            f'Scheduled epochs: {num_epochs}. LR stepped per {"epoch" if lr_scheduler.t_in_epochs else "update"}.')
+
+    # ***INC Pruning API Preparation***
+    
+    # Step 1: Load the INC packages
+    from neural_compressor.training import prepare_compression
+    from neural_compressor.training import WeightPruningConfig
+    # Step 2: epochs <==> steps conversion, use steps to initialize the pruning object
+    num_iterations = len(loader_train)
+    num_warmup_steps = int(args.warmup_epochs * num_iterations)
+    # Step 3: args.warmup_epochs, args.cooldown_epochs => pruner.start(end)_steps
+    if args.do_prune:
+        start_step = num_warmup_steps
+        end_step = (num_epochs - args.cooldown_epochs) * num_iterations
+    else:
+        # set start_step/end_step to a value which pruner can never arrive
+        start_step = int(num_iterations * (args.epochs + 1))
+        end_step = start_step
+    # Step 4: Define the pruning config
+    pruning_configs=[
+        {
+            "pruning_type": "snip_momentum",
+            "pruning_scope": "global",
+            "sparsity_decay_type": "exp",
+            "op_names": ["layer"],
+            "excluded_op_names": ["head", "fc", ".downsample"], # totally 48 conv-layers will be pruned
+            "pruning_op_types": ["Conv2d"],
+            "max_sparsity_ratio_per_op": 0.98
+        }
+    ]
+    configs = WeightPruningConfig(
+        pruning_configs,
+        target_sparsity=args.target_sparsity,
+        pattern=args.pruning_pattern,
+        pruning_frequency=args.update_frequency_on_step,
+        start_step=start_step,
+        end_step=end_step
+    )
+    # Step 5: Define INC object for pruning  
+    compression_manager = prepare_compression(model=model, confs=configs)
+    compression_manager.callbacks.on_train_begin()
+    model = compression_manager.model
+
+    _logger.info("Before pruning, obtain the model's validation performance")
+    eval_metrics = validate(
+        model,
+        loader_eval,
+        validate_loss_fn,
+        args,
+        device=device,
+        amp_autocast=amp_autocast,
+    )
+    _logger.info(f"Model accuracy before pruning is {eval_metrics}")
+
+    try:
+        for epoch in range(start_epoch, num_epochs):
+            if hasattr(dataset_train, 'set_epoch'):
+                dataset_train.set_epoch(epoch)
+            elif args.distributed and hasattr(loader_train.sampler, 'set_epoch'):
+                loader_train.sampler.set_epoch(epoch)
+
+            train_metrics = train_one_epoch(
+                epoch,
+                model,
+                teacher_model,
+                loader_train,
+                optimizer,
+                train_loss_fn,
+                args,
+                device=device,
+                lr_scheduler=lr_scheduler,
+                saver=saver,
+                output_dir=output_dir,
+                amp_autocast=amp_autocast,
+                loss_scaler=loss_scaler,
+                model_ema=model_ema,
+                mixup_fn=mixup_fn,
+                compression_manager=compression_manager,
+            )
+
+            if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
+                if utils.is_primary(args):
+                    _logger.info("Distributing BatchNorm running means and vars")
+                utils.distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
+
+            eval_metrics = validate(
+                model,
+                loader_eval,
+                validate_loss_fn,
+                args,
+                device=device,
+                amp_autocast=amp_autocast,
+            )
+
+            if model_ema is not None and not args.model_ema_force_cpu:
+                if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
+                    utils.distribute_bn(model_ema, args.world_size, args.dist_bn == 'reduce')
+
+                ema_eval_metrics = validate(
+                    model_ema.module,
+                    loader_eval,
+                    validate_loss_fn,
+                    args,
+                    device=device,
+                    amp_autocast=amp_autocast,
+                    log_suffix=' (EMA)',
+                )
+                eval_metrics = ema_eval_metrics
+
+            if output_dir is not None:
+                lrs = [param_group['lr'] for param_group in optimizer.param_groups]
+                utils.update_summary(
+                    epoch,
+                    train_metrics,
+                    eval_metrics,
+                    filename=os.path.join(output_dir, 'summary.csv'),
+                    lr=sum(lrs) / len(lrs),
+                    write_header=best_metric is None,
+                    log_wandb=args.log_wandb and has_wandb,
+                )
+
+            if saver is not None:
+                # save proper checkpoint with eval metric
+                save_metric = eval_metrics[eval_metric]
+                best_metric, best_epoch = saver.save_checkpoint(epoch, metric=save_metric)
+
+            if lr_scheduler is not None:
+                # step LR for next epoch
+                lr_scheduler.step(epoch + 1, eval_metrics[eval_metric])
+
+    except KeyboardInterrupt:
+        pass
+
+    if best_metric is not None:
+        _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
+
+
+def train_one_epoch(
+        epoch,
+        model,
+        teacher_model,
+        loader,
+        optimizer,
+        loss_fn,
+        args,
+        device=None,
+        lr_scheduler=None,
+        saver=None,
+        output_dir=None,
+        amp_autocast=suppress,
+        loss_scaler=None,
+        model_ema=None,
+        mixup_fn=None,
+        compression_manager=None,
+):
+    if args.mixup_off_epoch and epoch >= args.mixup_off_epoch:
+        if args.prefetcher and loader.mixup_enabled:
+            loader.mixup_enabled = False
+        elif mixup_fn is not None:
+            mixup_fn.mixup_enabled = False
+
+    second_order = hasattr(optimizer, 'is_second_order') and optimizer.is_second_order
+    batch_time_m = utils.AverageMeter()
+    data_time_m = utils.AverageMeter()
+    losses_m = utils.AverageMeter()
+
+    model.train()
+
+    end = time.time()
+    num_batches_per_epoch = len(loader)
+    last_idx = num_batches_per_epoch - 1
+    num_updates = epoch * num_batches_per_epoch
+    for batch_idx, (input, target) in enumerate(loader):
+        compression_manager.callbacks.on_step_begin(batch_id = batch_idx)
+        last_batch = batch_idx == last_idx
+        data_time_m.update(time.time() - end)
+        if not args.prefetcher:
+            input, target = input.to(device), target.to(device)
+            if mixup_fn is not None:
+                input, target = mixup_fn(input, target)
+        if args.channels_last:
+            input = input.contiguous(memory_format=torch.channels_last)
+
+        with amp_autocast():
+            output = model(input)
+            loss = loss_fn(output, target)
+
+        # use distillation
+        if args.do_distillation:
+            with torch.no_grad():
+                teacher_output = teacher_model(input)
+            distillation_loss = args.distillation_loss_weight * get_loss_one_logit(output, teacher_output)
+            loss += distillation_loss
+
+        if not args.distributed:
+            losses_m.update(loss.item(), input.size(0))
+
+        optimizer.zero_grad()
+        if loss_scaler is not None:
+            loss_scaler(
+                loss, optimizer,
+                clip_grad=args.clip_grad,
+                clip_mode=args.clip_mode,
+                parameters=model_parameters(model, exclude_head='agc' in args.clip_mode),
+                create_graph=second_order
+            )
+        else:
+            loss.backward(create_graph=second_order)
+            if args.clip_grad is not None:
+                utils.dispatch_clip_grad(
+                    model_parameters(model, exclude_head='agc' in args.clip_mode),
+                    value=args.clip_grad,
+                    mode=args.clip_mode
+                )
+            compression_manager.callbacks.on_before_optimizer_step()
+            optimizer.step()
+            compression_manager.callbacks.on_after_optimizer_step()
+
+        if model_ema is not None:
+            model_ema.update(model)
+
+        if device.type == 'cuda':
+            torch.cuda.synchronize()
+
+        num_updates += 1
+        batch_time_m.update(time.time() - end)
+        if last_batch or batch_idx % args.log_interval == 0:
+            lrl = [param_group['lr'] for param_group in optimizer.param_groups]
+            lr = sum(lrl) / len(lrl)
+
+            if args.distributed:
+                reduced_loss = utils.reduce_tensor(loss.data, args.world_size)
+                losses_m.update(reduced_loss.item(), input.size(0))
+
+            if utils.is_primary(args):
+                _logger.info(
+                    'Train: {} [{:>4d}/{} ({:>3.0f}%)]  '
+                    'Loss: {loss.val:#.4g} ({loss.avg:#.3g})  '
+                    'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s  '
+                    '({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s)  '
+                    'LR: {lr:.3e}  '
+                    'Data: {data_time.val:.3f} ({data_time.avg:.3f})'.format(
+                        epoch,
+                        batch_idx, len(loader),
+                        100. * batch_idx / last_idx,
+                        loss=losses_m,
+                        batch_time=batch_time_m,
+                        rate=input.size(0) * args.world_size / batch_time_m.val,
+                        rate_avg=input.size(0) * args.world_size / batch_time_m.avg,
+                        lr=lr,
+                        data_time=data_time_m)
+                )
+
+                if args.save_images and output_dir:
+                    torchvision.utils.save_image(
+                        input,
+                        os.path.join(output_dir, 'train-batch-%d.jpg' % batch_idx),
+                        padding=0,
+                        normalize=True
+                    )
+
+        if saver is not None and args.recovery_interval and (
+                last_batch or (batch_idx + 1) % args.recovery_interval == 0):
+            saver.save_recovery(epoch, batch_idx=batch_idx)
+
+        if lr_scheduler is not None:
+            lr_scheduler.step_update(num_updates=num_updates, metric=losses_m.avg)
+
+        end = time.time()
+        # end for
+
+    if hasattr(optimizer, 'sync_lookahead'):
+        optimizer.sync_lookahead()
+
+    return OrderedDict([('loss', losses_m.avg)])
+
+
+def validate(
+        model,
+        loader,
+        loss_fn,
+        args,
+        device=None,
+        amp_autocast=suppress,
+        log_suffix=''
+):
+    batch_time_m = utils.AverageMeter()
+    losses_m = utils.AverageMeter()
+    top1_m = utils.AverageMeter()
+    top5_m = utils.AverageMeter()
+
+    model.eval()
+
+    end = time.time()
+    last_idx = len(loader) - 1
+    with torch.no_grad():
+        for batch_idx, (input, target) in enumerate(loader):
+            last_batch = batch_idx == last_idx
+            if not args.prefetcher:
+                input = input.to(device)
+                target = target.to(device)
+            if args.channels_last:
+                input = input.contiguous(memory_format=torch.channels_last)
+
+            with amp_autocast():
+                output = model(input)
+                if isinstance(output, (tuple, list)):
+                    output = output[0]
+
+                # augmentation reduction
+                reduce_factor = args.tta
+                if reduce_factor > 1:
+                    output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2)
+                    target = target[0:target.size(0):reduce_factor]
+
+                loss = loss_fn(output, target)
+            acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
+
+            if args.distributed:
+                reduced_loss = utils.reduce_tensor(loss.data, args.world_size)
+                acc1 = utils.reduce_tensor(acc1, args.world_size)
+                acc5 = utils.reduce_tensor(acc5, args.world_size)
+            else:
+                reduced_loss = loss.data
+
+            if device.type == 'cuda':
+                torch.cuda.synchronize()
+
+            losses_m.update(reduced_loss.item(), input.size(0))
+            top1_m.update(acc1.item(), output.size(0))
+            top5_m.update(acc5.item(), output.size(0))
+
+            batch_time_m.update(time.time() - end)
+            end = time.time()
+            if utils.is_primary(args) and (last_batch or batch_idx % args.log_interval == 0):
+                log_name = 'Test' + log_suffix
+                _logger.info(
+                    '{0}: [{1:>4d}/{2}]  '
+                    'Time: {batch_time.val:.3f} ({batch_time.avg:.3f})  '
+                    'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f})  '
+                    'Acc@1: {top1.val:>7.4f} ({top1.avg:>7.4f})  '
+                    'Acc@5: {top5.val:>7.4f} ({top5.avg:>7.4f})'.format(
+                        log_name, batch_idx, last_idx,
+                        batch_time=batch_time_m,
+                        loss=losses_m,
+                        top1=top1_m,
+                        top5=top5_m)
+                )
+
+    metrics = OrderedDict([('loss', losses_m.avg), ('top1', top1_m.avg), ('top5', top5_m.avg)])
+
+    return metrics
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/pytorch/image_recognition/VGG-8/distillation/eager/README.md b/examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/VGG-8/distillation/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/README.md
diff --git a/examples/pytorch/image_recognition/VGG-8/distillation/eager/main.py b/examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/VGG-8/distillation/eager/main.py
rename to examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/main.py
diff --git a/examples/pytorch/image_recognition/VGG-8/distillation/eager/plain_cnn_cifar.py b/examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/plain_cnn_cifar.py
similarity index 100%
rename from examples/pytorch/image_recognition/VGG-8/distillation/eager/plain_cnn_cifar.py
rename to examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/plain_cnn_cifar.py
diff --git a/examples/pytorch/image_recognition/VGG-8/distillation/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/VGG-8/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/requirements.txt
diff --git a/examples/pytorch/image_recognition/VGG-8/distillation/eager/train_without_distillation.py b/examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/train_without_distillation.py
similarity index 100%
rename from examples/pytorch/image_recognition/VGG-8/distillation/eager/train_without_distillation.py
rename to examples/deprecated/pytorch/image_recognition/VGG-8/distillation/eager/train_without_distillation.py
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/LICENSE b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/LICENSE
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/LICENSE
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/LICENSE
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/PeleeNet_README.md b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/PeleeNet_README.md
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/PeleeNet_README.md
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/PeleeNet_README.md
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/main.py b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/main.py
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/peleenet.py b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/peleenet.py
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/peleenet.py
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/peleenet.py
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/peleenet/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/LICENSE b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/LICENSE
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/LICENSE
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/LICENSE
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/ResNest_README.md
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/resnest/quantization/ptq/fx/verify.py b/examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/verify.py
similarity index 100%
rename from examples/pytorch/image_recognition/resnest/quantization/ptq/fx/verify.py
rename to examples/deprecated/pytorch/image_recognition/resnest/quantization/ptq/fx/verify.py
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/LICENSE.txt b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/LICENSE.txt
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/LICENSE.txt
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/LICENSE.txt
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_eval.py b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_eval.py
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_eval.py
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_eval.py
diff --git a/examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/se_resnext/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/segment_anything/README.md b/examples/deprecated/pytorch/image_recognition/segment_anything/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/README.md
rename to examples/deprecated/pytorch/image_recognition/segment_anything/README.md
diff --git a/examples/pytorch/image_recognition/segment_anything/download_dataset.py b/examples/deprecated/pytorch/image_recognition/segment_anything/download_dataset.py
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/download_dataset.py
rename to examples/deprecated/pytorch/image_recognition/segment_anything/download_dataset.py
diff --git a/examples/pytorch/image_recognition/segment_anything/inc_dataset_loader.py b/examples/deprecated/pytorch/image_recognition/segment_anything/inc_dataset_loader.py
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/inc_dataset_loader.py
rename to examples/deprecated/pytorch/image_recognition/segment_anything/inc_dataset_loader.py
diff --git a/examples/pytorch/image_recognition/segment_anything/main.py b/examples/deprecated/pytorch/image_recognition/segment_anything/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/main.py
rename to examples/deprecated/pytorch/image_recognition/segment_anything/main.py
diff --git a/examples/pytorch/image_recognition/segment_anything/requirements.txt b/examples/deprecated/pytorch/image_recognition/segment_anything/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/segment_anything/requirements.txt
diff --git a/examples/pytorch/image_recognition/segment_anything/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/segment_anything/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/segment_anything/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/segment_anything/run_quant.sh b/examples/deprecated/pytorch/image_recognition/segment_anything/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/segment_anything/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/segment_anything/run_quant.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/distillation/eager/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/distillation/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/distillation/eager/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/distillation/eager/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/distillation/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/distillation/eager/run_distillation.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/run_distillation.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/distillation/eager/run_distillation.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/distillation/eager/run_distillation.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/onnx_evaluation.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/onnx_evaluation.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/onnx_evaluation.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/onnx_evaluation.py
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/run_export.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/run_export.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/run_export.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/export/fx/run_export.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/main.py
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/cv/mixed_precision/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/prune_and_ptq/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/export/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/export/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
similarity index 99%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
index 2de0d3b3c3c..910b3657599 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
+++ b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/README.md
@@ -163,4 +163,4 @@ quantized_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)
 ```
 Here, `dataloader` is used to get example_inputs for `torch.fx` to trace the model. You can also pass in `example_inputs` instead. For torch version < 1.13.0, you can ignore this parameter.
 
-Please refer to [Sample code](./main.py).
+Please refer to [Sample code](main.py).
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
similarity index 98%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
index 0d00e8cd8df..2b6f2b1cd98 100644
--- a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
+++ b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/README.md
@@ -127,4 +127,4 @@ quantized_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)
                         dataloader=val_loader)
 ```
 
-Please refer to [Sample code](./main.py).
+Please refer to [Sample code](main.py).
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/patch.patch b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/patch.patch
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/patch.patch
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/patch.patch
diff --git a/examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/optimization_pipeline/qat_during_prune/fx/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_quant.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/mixed_precision/resnet18/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_benchmark.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_quant.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_quant.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/quantization/qat/fx/run_quant.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/README.md b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/README.md
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/README.md
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/autoaugment.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/autoaugment.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/autoaugment.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/autoaugment.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/cutout.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/cutout.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/cutout.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/cutout.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/requirements.txt b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/requirements.txt
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/resnet.py b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/resnet.py
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/resnet.py
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/resnet.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_distillation.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_distillation.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_distillation.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_distillation.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_selfteacher.sh b/examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_selfteacher.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_selfteacher.sh
rename to examples/deprecated/pytorch/image_recognition/torchvision_models/self_distillation/eager/run_selfteacher.sh
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/.gitignore b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/.gitignore
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/.gitignore
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/.gitignore
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/LICENSE b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/LICENSE
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/LICENSE
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/LICENSE
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/README.md b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/README.md
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/README.md
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/checkpoint.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/checkpoint.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/checkpoint.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/checkpoint.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/classify.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/classify.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/classify.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/classify.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/agnews_data.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/agnews_data.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/agnews_data.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/agnews_data.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/bert_base.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/bert_base.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/bert_base.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/bert_base.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/agnews/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/blendcnn/mrpc/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/agnews/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/distill/mrpc/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/agnews/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/train.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/finetune/mrpc/train.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/mrpc_data.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/mrpc_data.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/mrpc_data.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/mrpc_data.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/config/pretrain.json b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/pretrain.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/config/pretrain.json
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/config/pretrain.json
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/data.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/data.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/data.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/data.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/distill.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/distill.py
similarity index 99%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/distill.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/distill.py
index 0ef4f649dc7..59c5a2bc7dc 100644
--- a/examples/pytorch/nlp/blendcnn/distillation/eager/distill.py
+++ b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/distill.py
@@ -69,8 +69,8 @@ def get_data_iterator(mode='train'):
             data.Tokenizing(tokenizer.convert_to_unicode, tokenizer.tokenize),
             data.AddSpecialTokensWithTruncation(cfg_data.max_len),
             data.TokenIndexing(tokenizer.convert_tokens_to_ids,
-                            TaskDataset.labels,
-                            cfg_data.max_len)
+                               TaskDataset.labels,
+                               cfg_data.max_len)
         ], n_data=None)
         tensors = TensorDataset(*dataset.get_tensors()) # To Tensors
         data_iter = DataLoader(tensors, batch_size=cfg_optim.batch_size, shuffle=False)
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/finetune.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/finetune.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/finetune.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/finetune.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/models.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/models.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/models.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/models.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/optim.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/optim.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/optim.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/optim.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/pretrain.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/pretrain.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/pretrain.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/pretrain.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/tokenization.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/tokenization.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/tokenization.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/tokenization.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/trainer.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/trainer.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/trainer.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/trainer.py
diff --git a/examples/pytorch/nlp/blendcnn/distillation/eager/utils.py b/examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/utils.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/distillation/eager/utils.py
rename to examples/deprecated/pytorch/nlp/blendcnn/distillation/eager/utils.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/LICENSE b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/LICENSE
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/LICENSE
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/LICENSE
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/README.md b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/README.md
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/README.md
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/README.md
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/checkpoint.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/checkpoint.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/checkpoint.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/checkpoint.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/classify.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/classify.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/classify.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/classify.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/agnews_data.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/agnews_data.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/agnews_data.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/agnews_data.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/bert_base.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/bert_base.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/bert_base.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/bert_base.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/train.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/agnews/train.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/train.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/blendcnn/mrpc/train.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/train.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/agnews/train.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/eval.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/eval.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/eval.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/eval.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/optim.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/optim.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/optim.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/optim.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/train.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/train.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/train.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/finetune/mrpc/train.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/mrpc_data.json b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/mrpc_data.json
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/mrpc_data.json
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/config/mrpc_data.json
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/data.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/data.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/data.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/data.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/models.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/models.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/models.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/models.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/optim.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/optim.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/optim.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/optim.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/requirements.txt b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/requirements.txt
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/requirements.txt
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_benchmark.sh b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_benchmark.sh
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_quant.sh b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_quant.sh
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/run_quant.sh
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/tokenization.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/tokenization.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/tokenization.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/tokenization.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/trainer.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/trainer.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/trainer.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/trainer.py
diff --git a/examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/utils.py b/examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/utils.py
similarity index 100%
rename from examples/pytorch/nlp/blendcnn/quantization/ptq/ipex/utils.py
rename to examples/deprecated/pytorch/nlp/blendcnn/quantization/ptq/ipex/utils.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/run_clm_sparsegpt.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_bloom_pruning.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_bloom_pruning.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_bloom_pruning.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_bloom_pruning.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_gptj_pruning.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_gptj_pruning.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_gptj_pruning.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_gptj_pruning.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llama_pruning.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llama_pruning.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llama_pruning.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llama_pruning.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llm_sparsegpt.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llm_sparsegpt.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llm_sparsegpt.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_llm_sparsegpt.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_opt_pruning.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_opt_pruning.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_opt_pruning.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/scripts/run_opt_pruning.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py
similarity index 96%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py
index ee4999c460a..5bed4d86dff 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager/timers.py
@@ -1,33 +1,33 @@
-import time
-import torch
-class CPUTimer:
-    def __init__(self, timelogs):
-        self.timelogs = timelogs
-
-    def __enter__(self):
-        self.start = time.time()
-
-    def __exit__(self):
-        end = time.time()
-        self.timelogs.append((end - self.start) * 1000) # ms
-    
-    def get_avg_time(self):
-        return sum(self.timelogs) / len(self.timelogs)
-
-class GPUTimer:
-    def __init__(self, timelogs):
-        self.timelogs = timelogs
-
-    def __enter__(self):
-        self.start_event = torch.cuda.Event(enable_timing=True)
-        self.end_event = torch.cuda.Event(enable_timing=True)
-        self.start_event.record()
-
-    def __exit__(self):
-        self.end_event.record()
-        self.end_event.synchronize()
-        elapsed_time = self.start_event.elapsed_time(self.end_event)
-        self.timelogs.append(elapsed_time)
-
-    def get_avg_time(self):
+import time
+import torch
+class CPUTimer:
+    def __init__(self, timelogs):
+        self.timelogs = timelogs
+
+    def __enter__(self):
+        self.start = time.time()
+
+    def __exit__(self):
+        end = time.time()
+        self.timelogs.append((end - self.start) * 1000) # ms
+    
+    def get_avg_time(self):
+        return sum(self.timelogs) / len(self.timelogs)
+
+class GPUTimer:
+    def __init__(self, timelogs):
+        self.timelogs = timelogs
+
+    def __enter__(self):
+        self.start_event = torch.cuda.Event(enable_timing=True)
+        self.end_event = torch.cuda.Event(enable_timing=True)
+        self.start_event.record()
+
+    def __exit__(self):
+        self.end_event.record()
+        self.end_event.synchronize()
+        elapsed_time = self.start_event.elapsed_time(self.end_event)
+        self.timelogs.append(elapsed_time)
+
+    def get_avg_time(self):
         return sum(self.timelogs) / len(self.timelogs)
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage2_config.json b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage2_config.json
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage2_config.json
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage2_config.json
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage3_config.json b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage3_config.json
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage3_config.json
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/config/zero_stage3_config.json
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer_deepspeed.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer_deepspeed.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer_deepspeed.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_clm_no_trainer_deepspeed.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds_z3.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds_z3.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds_z3.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/multi_cards/run_ds_z3.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/cnn_dm_dataset.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/cnn_dm_dataset.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/cnn_dm_dataset.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/cnn_dm_dataset.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_gptj_mlperf_int4.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md
similarity index 97%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md
index 04fe3b3660d..bf10995b01b 100644
--- a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/README.md
@@ -92,4 +92,4 @@ quantized_model = load(tuned_checkpoint,
                        model)
 ```
 --------
-For more details, please refer to the [sample code](./run_clm.py).
+For more details, please refer to the [sample code](run_clm.py).
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_clm.py
diff --git a/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/load_model_hub.py b/examples/deprecated/pytorch/nlp/huggingface_models/load_model_hub.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/load_model_hub.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/load_model_hub.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/run_qa_no_trainer_distillation.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/run_qa_no_trainer_distillation.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/run_qa_no_trainer_distillation.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/run_qa_no_trainer_distillation.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/distillation/eager/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/distillation/eager/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/run_qa.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/trainer_qa.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/trainer_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md
similarity index 98%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md
index 796fa6e02fa..5ad4e873372 100644
--- a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/README.md
@@ -1,47 +1,47 @@
-# Step by Step
-
-## Channel Pruning for Consecutive Linear Layers
-An interesting thing for pruning is that if we do [channel pruning](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#pruning-patterns) for some linear layers in NLP models, we can permanently remove these all-zero channels without changing their accuracy. 
-
-To be specific, if a model has two consecutive linear layers, which is common in both **Bert series** and **GPT series** models' FFN parts, and we conduct the input channel pruning for the second linear layer (masking weights by column). We can remove these all-zero channels. Plus, we also remove the same indices' output channels in the first linear layers (masking weights by row), since their contribution for activation will be masked by the second layer's. 
-
-This leads to no change for model's accuracy, but can obtain a significant acceleration for model's inference, because the transformer models' FFN parts take nearly 50% of entire computing overhead. Thus, compressing weights in FFN parts is really useful.
-
-## Multi-head Pruning for Self-Attention Layers
-Self attention modules are common in all Transformer-based models. These models use multi-head attention (also known as MHA) to enhance their abilities of linking contextual information. Transformer-based models usually stack a sequence of MHA modules, and this makes MHA takes a noticeable storage and memory bandwidth. As an optimization method, head pruning removes attention heads which make minor contribution to model's contextual analysis. This method does not lead to much accuracy loss, but provides us with much opportunity for model acceleration. 
-
-## API for Consecutive Linear Layers and Multi-head attention Slim.
-We provide API functions for you to complete the process above and slim your transformer models easily. Here is how to call our API functions. Simply provide a target sparsity value to our Our API function **parse_auto_slim_config** and it can generate the [pruning_configs](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#get-started-with-pruning-api) used by our pruning API. Such process is fully automatic and target linear layers will be included without manual setting. After pruning process finished, use API function **model_slim** to slim the model.
-
-```python
-# auto slim config
-# part1 generate pruning configs for the second linear layers. 
-pruning_configs = []
-from neural_compressor.compression.pruner import parse_auto_slim_config
-auto_slim_configs = parse_auto_slim_config(
-    model, 
-    ffn2_sparsity = prune_ffn2_sparsity, # define target sparsity with a float between 0 and 1
-    mha_sparsity = prune_mha_sparsity, # define target sparsity with a float between 0 and 1
-)
-pruning_configs += auto_slim_configs
-
-################
-"""
-# Training codes.
-......
-"""
-################
-
-from neural_compressor.compression.pruner import model_slim
-model = model_slim(model)
-```
-Please noted that if you already have a sparse model which corresponding linear layers pruned, you can simply call the last two lines to complete the model slim. 
-
-## Run Examples
-We provides an example of Bert-Base to demonstrate how we slim Transformer-based models. In this example, we simultaneously prune the searched feed forward networks and multi-head attention modules to obtain the best acceleration performance. Simply run the following script:
-```bash
-sh run_qa_auto_slim.sh
-```
-After FFN compression, the inference speed of the model will be significantly improved on both CPU and GPU.
-
-For more information about pruning, please refer to our [INC Pruning API](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner).
+# Step by Step
+
+## Channel Pruning for Consecutive Linear Layers
+An interesting thing for pruning is that if we do [channel pruning](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#pruning-patterns) for some linear layers in NLP models, we can permanently remove these all-zero channels without changing their accuracy. 
+
+To be specific, if a model has two consecutive linear layers, which is common in both **Bert series** and **GPT series** models' FFN parts, and we conduct the input channel pruning for the second linear layer (masking weights by column). We can remove these all-zero channels. Plus, we also remove the same indices' output channels in the first linear layers (masking weights by row), since their contribution for activation will be masked by the second layer's. 
+
+This leads to no change for model's accuracy, but can obtain a significant acceleration for model's inference, because the transformer models' FFN parts take nearly 50% of entire computing overhead. Thus, compressing weights in FFN parts is really useful.
+
+## Multi-head Pruning for Self-Attention Layers
+Self attention modules are common in all Transformer-based models. These models use multi-head attention (also known as MHA) to enhance their abilities of linking contextual information. Transformer-based models usually stack a sequence of MHA modules, and this makes MHA takes a noticeable storage and memory bandwidth. As an optimization method, head pruning removes attention heads which make minor contribution to model's contextual analysis. This method does not lead to much accuracy loss, but provides us with much opportunity for model acceleration. 
+
+## API for Consecutive Linear Layers and Multi-head attention Slim.
+We provide API functions for you to complete the process above and slim your transformer models easily. Here is how to call our API functions. Simply provide a target sparsity value to our Our API function **parse_auto_slim_config** and it can generate the [pruning_configs](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner#get-started-with-pruning-api) used by our pruning API. Such process is fully automatic and target linear layers will be included without manual setting. After pruning process finished, use API function **model_slim** to slim the model.
+
+```python
+# auto slim config
+# part1 generate pruning configs for the second linear layers. 
+pruning_configs = []
+from neural_compressor.compression.pruner import parse_auto_slim_config
+auto_slim_configs = parse_auto_slim_config(
+    model, 
+    ffn2_sparsity = prune_ffn2_sparsity, # define target sparsity with a float between 0 and 1
+    mha_sparsity = prune_mha_sparsity, # define target sparsity with a float between 0 and 1
+)
+pruning_configs += auto_slim_configs
+
+################
+"""
+# Training codes.
+......
+"""
+################
+
+from neural_compressor.compression.pruner import model_slim
+model = model_slim(model)
+```
+Please noted that if you already have a sparse model which corresponding linear layers pruned, you can simply call the last two lines to complete the model slim. 
+
+## Run Examples
+We provides an example of Bert-Base to demonstrate how we slim Transformer-based models. In this example, we simultaneously prune the searched feed forward networks and multi-head attention modules to obtain the best acceleration performance. Simply run the following script:
+```bash
+sh run_qa_auto_slim.sh
+```
+After FFN compression, the inference speed of the model will be significantly improved on both CPU and GPU.
+
+For more information about pruning, please refer to our [INC Pruning API](https://github.com/intel/neural-compressor/tree/master/neural_compressor/compression/pruner).
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-base.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-base.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-base.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-base.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-large.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-large.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-large.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_auto_slim_bert-large.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_no_trainer_auto_slim.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_no_trainer_auto_slim.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_no_trainer_auto_slim.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/run_qa_no_trainer_auto_slim.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py
similarity index 96%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py
index ee4999c460a..5bed4d86dff 100644
--- a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/timers.py
@@ -1,33 +1,33 @@
-import time
-import torch
-class CPUTimer:
-    def __init__(self, timelogs):
-        self.timelogs = timelogs
-
-    def __enter__(self):
-        self.start = time.time()
-
-    def __exit__(self):
-        end = time.time()
-        self.timelogs.append((end - self.start) * 1000) # ms
-    
-    def get_avg_time(self):
-        return sum(self.timelogs) / len(self.timelogs)
-
-class GPUTimer:
-    def __init__(self, timelogs):
-        self.timelogs = timelogs
-
-    def __enter__(self):
-        self.start_event = torch.cuda.Event(enable_timing=True)
-        self.end_event = torch.cuda.Event(enable_timing=True)
-        self.start_event.record()
-
-    def __exit__(self):
-        self.end_event.record()
-        self.end_event.synchronize()
-        elapsed_time = self.start_event.elapsed_time(self.end_event)
-        self.timelogs.append(elapsed_time)
-
-    def get_avg_time(self):
+import time
+import torch
+class CPUTimer:
+    def __init__(self, timelogs):
+        self.timelogs = timelogs
+
+    def __enter__(self):
+        self.start = time.time()
+
+    def __exit__(self):
+        end = time.time()
+        self.timelogs.append((end - self.start) * 1000) # ms
+    
+    def get_avg_time(self):
+        return sum(self.timelogs) / len(self.timelogs)
+
+class GPUTimer:
+    def __init__(self, timelogs):
+        self.timelogs = timelogs
+
+    def __enter__(self):
+        self.start_event = torch.cuda.Event(enable_timing=True)
+        self.end_event = torch.cuda.Event(enable_timing=True)
+        self.start_event.record()
+
+    def __exit__(self):
+        self.end_event.record()
+        self.end_event.synchronize()
+        elapsed_time = self.start_event.elapsed_time(self.end_event)
+        self.timelogs.append(elapsed_time)
+
+    def get_avg_time(self):
         return sum(self.timelogs) / len(self.timelogs)
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/model_slim/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/model_slim/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/run_qa_no_trainer_pruneOFA.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/run_qa_no_trainer_pruneOFA.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/run_qa_no_trainer_pruneOFA.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/run_qa_no_trainer_pruneOFA.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/optimization_pipeline/prune_once_for_all/fx/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer_block.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer_block.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer_block.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/run_qa_no_trainer_block.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertbase_squad_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertbase_squad_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertbase_squad_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertbase_squad_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertlarge_squad_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertlarge_squad_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertlarge_squad_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertlarge_squad_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_dense_fintune.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_dense_fintune.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_dense_fintune.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_dense_fintune.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_2in4.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_2in4.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_2in4.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_2in4.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/bertmini_squad_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/distilbert_squad_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/distilbert_squad_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/distilbert_squad_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/scripts/distilbert_squad_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py
similarity index 97%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py
index 7ef1da864e8..a6fe14c6cab 100644
--- a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_qa.py
@@ -1,742 +1,742 @@
-#!/usr/bin/env python
-# coding=utf-8
-# Copyright 2020 The HuggingFace Team All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Fine-tuning the library models for question answering.
-"""
-# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
-
-import logging
-import os
-import sys
-from dataclasses import dataclass, field
-from typing import Optional
-import timeit
-
-import datasets
-from datasets import load_dataset, load_metric
-
-import transformers
-from trainer_qa import QuestionAnsweringTrainer
-from transformers import (
-    AutoConfig,
-    AutoModelForQuestionAnswering,
-    AutoTokenizer,
-    DataCollatorWithPadding,
-    EvalPrediction,
-    HfArgumentParser,
-    PreTrainedTokenizerFast,
-    TrainingArguments,
-    default_data_collator,
-    set_seed,
-)
-from transformers.trainer_utils import get_last_checkpoint
-from transformers.utils import check_min_version
-from transformers.utils.versions import require_version
-from utils_qa import postprocess_qa_predictions
-
-
-# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
-check_min_version("4.10.0")
-
-require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
-
-logger = logging.getLogger(__name__)
-
-
-@dataclass
-class ModelArguments:
-    """
-    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
-    """
-
-    model_name_or_path: str = field(
-        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
-    )
-    config_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
-    )
-    tokenizer_name: Optional[str] = field(
-        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
-    )
-    cache_dir: Optional[str] = field(
-        default=None,
-        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
-    )
-    model_revision: str = field(
-        default="main",
-        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
-    )
-    use_auth_token: bool = field(
-        default=False,
-        metadata={
-            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
-            "with private models)."
-        },
-    )
-    tune: bool = field(
-        default=False,
-        metadata={"help": "Whether or not to apply quantization."},
-    )
-    int8: bool = field(
-        default=False, metadata={"help": "use int8 model to get accuracy or benchmark"}
-    )
-    performance: bool = field(
-        default=False, metadata={"help": "get benchmark instead of accuracy"}
-    )
-    accuracy: bool = field(
-        default=False, metadata={"help": "get accuracy"}
-    )
-    iters: int = field(
-        default=100,
-        metadata={
-            "help": "The inference iterations to run for benchmark."
-        },
-    )
-
-
-@dataclass
-class DataTrainingArguments:
-    """
-    Arguments pertaining to what data we are going to input our model for training and eval.
-    """
-
-    dataset_name: Optional[str] = field(
-        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
-    )
-    dataset_config_name: Optional[str] = field(
-        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
-    )
-    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
-    validation_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
-    )
-    test_file: Optional[str] = field(
-        default=None,
-        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
-    )
-    overwrite_cache: bool = field(
-        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
-    )
-    preprocessing_num_workers: Optional[int] = field(
-        default=None,
-        metadata={"help": "The number of processes to use for the preprocessing."},
-    )
-    max_seq_length: int = field(
-        default=384,
-        metadata={
-            "help": "The maximum total input sequence length after tokenization. Sequences longer "
-            "than this will be truncated, sequences shorter will be padded."
-        },
-    )
-    pad_to_max_length: bool = field(
-        default=True,
-        metadata={
-            "help": "Whether to pad all samples to `max_seq_length`. "
-            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
-            "be faster on GPU but will be slower on TPU)."
-        },
-    )
-    max_train_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
-            "value if set."
-        },
-    )
-    max_eval_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
-            "value if set."
-        },
-    )
-    max_predict_samples: Optional[int] = field(
-        default=None,
-        metadata={
-            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
-            "value if set."
-        },
-    )
-    version_2_with_negative: bool = field(
-        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
-    )
-    null_score_diff_threshold: float = field(
-        default=0.0,
-        metadata={
-            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
-            "the score of the null answer minus this threshold, the null answer is selected for this example. "
-            "Only useful when `version_2_with_negative=True`."
-        },
-    )
-    doc_stride: int = field(
-        default=128,
-        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
-    )
-    n_best_size: int = field(
-        default=20,
-        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
-    )
-    max_answer_length: int = field(
-        default=30,
-        metadata={
-            "help": "The maximum length of an answer that can be generated. This is needed because the start "
-            "and end predictions are not conditioned on one another."
-        },
-    )
-
-    def __post_init__(self):
-        if (
-            self.dataset_name is None
-            and self.train_file is None
-            and self.validation_file is None
-            and self.test_file is None
-        ):
-            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
-        else:
-            if self.train_file is not None:
-                extension = self.train_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
-            if self.validation_file is not None:
-                extension = self.validation_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
-            if self.test_file is not None:
-                extension = self.test_file.split(".")[-1]
-                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
-
-
-def main():
-    # See all possible arguments in src/transformers/training_args.py
-    # or by passing the --help flag to this script.
-    # We now keep distinct sets of args, for a cleaner separation of concerns.
-
-    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
-    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
-        # If we pass only one argument to the script and it's the path to a json file,
-        # let's parse it to get our arguments.
-        model_args, data_args, training_args = parser.parse_json_file(
-            json_file=os.path.abspath(sys.argv[1])
-        )
-    else:
-        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    log_level = training_args.get_process_log_level()
-    logger.setLevel(log_level)
-    datasets.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.set_verbosity(log_level)
-    transformers.utils.logging.enable_default_handler()
-    transformers.utils.logging.enable_explicit_format()
-
-    # Log on each process the small summary:
-    logger.warning(
-        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
-        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
-    )
-    logger.info(f"Training/evaluation parameters {training_args}")
-
-    # Detecting last checkpoint.
-    last_checkpoint = None
-    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
-        last_checkpoint = get_last_checkpoint(training_args.output_dir)
-        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
-            raise ValueError(
-                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
-                "Use --overwrite_output_dir to overcome."
-            )
-        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
-            logger.info(
-                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
-                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
-            )
-
-    # Set seed before initializing model.
-    set_seed(training_args.seed)
-
-    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
-    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
-    # (the dataset will be downloaded automatically from the datasets Hub).
-    #
-    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
-    # 'text' is found. You can easily tweak this behavior (see below).
-    #
-    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
-    # download the dataset.
-    if data_args.dataset_name is not None:
-        # Downloading and loading a dataset from the hub.
-        raw_datasets = load_dataset(
-            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
-        )
-    else:
-        data_files = {}
-        if data_args.train_file is not None:
-            data_files["train"] = data_args.train_file
-            extension = data_args.train_file.split(".")[-1]
-
-        if data_args.validation_file is not None:
-            data_files["validation"] = data_args.validation_file
-            extension = data_args.validation_file.split(".")[-1]
-        if data_args.test_file is not None:
-            data_files["test"] = data_args.test_file
-            extension = data_args.test_file.split(".")[-1]
-        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
-    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
-    # https://huggingface.co/docs/datasets/loading_datasets.html.
-
-    # Load pretrained model and tokenizer
-    #
-    # Distributed training:
-    # The .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    config = AutoConfig.from_pretrained(
-        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    tokenizer = AutoTokenizer.from_pretrained(
-        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
-        cache_dir=model_args.cache_dir,
-        use_fast=True,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-    model = AutoModelForQuestionAnswering.from_pretrained(
-        model_args.model_name_or_path,
-        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-        config=config,
-        cache_dir=model_args.cache_dir,
-        revision=model_args.model_revision,
-        use_auth_token=True if model_args.use_auth_token else None,
-    )
-
-    # Tokenizer check: this script requires a fast tokenizer.
-    if not isinstance(tokenizer, PreTrainedTokenizerFast):
-        raise ValueError(
-            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
-            "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
-            "requirement"
-        )
-
-    # Preprocessing the datasets.
-    # Preprocessing is slightly different for training and evaluation.
-    if training_args.do_train:
-        column_names = raw_datasets["train"].column_names
-    elif training_args.do_eval:
-        column_names = raw_datasets["validation"].column_names
-    else:
-        column_names = raw_datasets["test"].column_names
-    question_column_name = "question" if "question" in column_names else column_names[0]
-    context_column_name = "context" if "context" in column_names else column_names[1]
-    answer_column_name = "answers" if "answers" in column_names else column_names[2]
-
-    # Padding side determines if we do (question|context) or (context|question).
-    pad_on_right = tokenizer.padding_side == "right"
-
-    if data_args.max_seq_length > tokenizer.model_max_length:
-        logger.warning(
-            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
-            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
-        )
-    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
-
-    # Training preprocessing
-    def prepare_train_features(examples):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
-        # in one example possible giving several features when a context is long, each of those features having a
-        # context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=max_seq_length,
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-        # The offset mappings will give us a map from token to character position in the original context. This will
-        # help us compute the start_positions and end_positions.
-        offset_mapping = tokenized_examples.pop("offset_mapping")
-
-        # Let's label those examples!
-        tokenized_examples["start_positions"] = []
-        tokenized_examples["end_positions"] = []
-
-        for i, offsets in enumerate(offset_mapping):
-            # We will label impossible answers with the index of the CLS token.
-            input_ids = tokenized_examples["input_ids"][i]
-            cls_index = input_ids.index(tokenizer.cls_token_id)
-
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
-            sequence_ids = tokenized_examples.sequence_ids(i)
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            answers = examples[answer_column_name][sample_index]
-            # If no answers are given, set the cls_index as answer.
-            if len(answers["answer_start"]) == 0:
-                tokenized_examples["start_positions"].append(cls_index)
-                tokenized_examples["end_positions"].append(cls_index)
-            else:
-                # Start/end character index of the answer in the text.
-                start_char = answers["answer_start"][0]
-                end_char = start_char + len(answers["text"][0])
-
-                # Start token index of the current span in the text.
-                token_start_index = 0
-                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
-                    token_start_index += 1
-
-                # End token index of the current span in the text.
-                token_end_index = len(input_ids) - 1
-                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
-                    token_end_index -= 1
-
-                # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
-                if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
-                    tokenized_examples["start_positions"].append(cls_index)
-                    tokenized_examples["end_positions"].append(cls_index)
-                else:
-                    # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
-                    # Note: we could go after the last offset if the answer is the last word (edge case).
-                    while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
-                        token_start_index += 1
-                    tokenized_examples["start_positions"].append(token_start_index - 1)
-                    while offsets[token_end_index][1] >= end_char:
-                        token_end_index -= 1
-                    tokenized_examples["end_positions"].append(token_end_index + 1)
-
-        return tokenized_examples
-
-    if training_args.do_train:
-        if "train" not in raw_datasets:
-            raise ValueError("--do_train requires a train dataset")
-        train_dataset = raw_datasets["train"]
-        if data_args.max_train_samples is not None:
-            # We will select sample from whole data if argument is specified
-            train_dataset = train_dataset.select(range(data_args.max_train_samples))
-        # Create train feature from dataset
-        with training_args.main_process_first(desc="train dataset map pre-processing"):
-            train_dataset = train_dataset.map(
-                prepare_train_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on train dataset",
-            )
-        if data_args.max_train_samples is not None:
-            # Number of samples might increase during Feature Creation, We select only specified max samples
-            train_dataset = train_dataset.select(range(data_args.max_train_samples))
-
-    # Validation preprocessing
-    def prepare_validation_features(examples):
-        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
-        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
-        # left whitespace
-        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
-
-        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
-        # in one example possible giving several features when a context is long, each of those features having a
-        # context that overlaps a bit the context of the previous feature.
-        tokenized_examples = tokenizer(
-            examples[question_column_name if pad_on_right else context_column_name],
-            examples[context_column_name if pad_on_right else question_column_name],
-            truncation="only_second" if pad_on_right else "only_first",
-            max_length=max_seq_length,
-            stride=data_args.doc_stride,
-            return_overflowing_tokens=True,
-            return_offsets_mapping=True,
-            padding="max_length" if data_args.pad_to_max_length else False,
-        )
-
-        # Since one example might give us several features if it has a long context, we need a map from a feature to
-        # its corresponding example. This key gives us just that.
-        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
-
-        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
-        # corresponding example_id and we will store the offset mappings.
-        tokenized_examples["example_id"] = []
-
-        for i in range(len(tokenized_examples["input_ids"])):
-            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
-            sequence_ids = tokenized_examples.sequence_ids(i)
-            context_index = 1 if pad_on_right else 0
-
-            # One example can give several spans, this is the index of the example containing this span of text.
-            sample_index = sample_mapping[i]
-            tokenized_examples["example_id"].append(examples["id"][sample_index])
-
-            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
-            # position is part of the context or not.
-            tokenized_examples["offset_mapping"][i] = [
-                (o if sequence_ids[k] == context_index else None)
-                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
-            ]
-
-        return tokenized_examples
-
-    if training_args.do_eval:
-        if "validation" not in raw_datasets:
-            raise ValueError("--do_eval requires a validation dataset")
-        eval_examples = raw_datasets["validation"]
-        if data_args.max_eval_samples is not None:
-            # We will select sample from whole data
-            eval_examples = eval_examples.select(range(data_args.max_eval_samples))
-        # Validation Feature Creation
-        with training_args.main_process_first(desc="validation dataset map pre-processing"):
-            eval_dataset = eval_examples.map(
-                prepare_validation_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on validation dataset",
-            )
-        if data_args.max_eval_samples is not None:
-            # During Feature creation dataset samples might increase, we will select required samples again
-            eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
-        if training_args.dataloader_drop_last:
-            data_lens = len(eval_dataset)
-            data_lens -= data_lens % training_args.per_device_eval_batch_size
-            eval_dataset = eval_dataset.select(range(data_lens))
-
-    if training_args.do_predict:
-        if "test" not in raw_datasets:
-            raise ValueError("--do_predict requires a test dataset")
-        predict_examples = raw_datasets["test"]
-        if data_args.max_predict_samples is not None:
-            # We will select sample from whole data
-            predict_examples = predict_examples.select(range(data_args.max_predict_samples))
-        # Predict Feature Creation
-        with training_args.main_process_first(desc="prediction dataset map pre-processing"):
-            predict_dataset = predict_examples.map(
-                prepare_validation_features,
-                batched=True,
-                num_proc=data_args.preprocessing_num_workers,
-                remove_columns=column_names,
-                load_from_cache_file=not data_args.overwrite_cache,
-                desc="Running tokenizer on prediction dataset",
-            )
-        if data_args.max_predict_samples is not None:
-            # During Feature creation dataset samples might increase, we will select required samples again
-            predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))
-
-    # Data collator
-    # We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
-    # collator.
-    data_collator = (
-        default_data_collator
-        if data_args.pad_to_max_length
-        else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
-    )
-
-    # Post-processing:
-    def post_processing_function(examples, features, predictions, stage="eval"):
-        # Post-processing: we match the start logits and end logits to answers in the original context.
-        predictions = postprocess_qa_predictions(
-            examples=examples,
-            features=features,
-            predictions=predictions,
-            version_2_with_negative=data_args.version_2_with_negative,
-            n_best_size=data_args.n_best_size,
-            max_answer_length=data_args.max_answer_length,
-            null_score_diff_threshold=data_args.null_score_diff_threshold,
-            output_dir=training_args.output_dir,
-            log_level=log_level,
-            prefix=stage,
-        )
-        # Format the result to the format the metric expects.
-        if data_args.version_2_with_negative:
-            formatted_predictions = [
-                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
-            ]
-        else:
-            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
-
-        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
-        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
-
-    metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
-
-    def compute_metrics(p: EvalPrediction):
-        return metric.compute(predictions=p.predictions, references=p.label_ids)
-
-    # Initialize our Trainer
-    trainer = QuestionAnsweringTrainer(
-        model=model,
-        args=training_args,
-        train_dataset=train_dataset if training_args.do_train else None,
-        eval_dataset=eval_dataset if training_args.do_eval else None,
-        eval_examples=eval_examples if training_args.do_eval else None,
-        tokenizer=tokenizer,
-        data_collator=data_collator,
-        post_process_function=post_processing_function,
-        compute_metrics=compute_metrics,
-    )
-
-    eval_dataloader = trainer.get_eval_dataloader()
-    # transformer issue #1
-    # for transformers 4.31.0: accelerate dataloader
-    # *** ValueError: batch_size attribute should not be set 
-    # after DataLoaderShard is initialized
-    if eval_dataloader.batch_size is None:
-        def _build_inc_dataloader(dataloader):
-            class INCDataLoader:
-                __iter__ = dataloader.__iter__
-                def __init__(self) -> None:
-                    self.dataloader = dataloader
-                    self.batch_size = dataloader.total_batch_size
-            return INCDataLoader()
-        eval_dataloader = _build_inc_dataloader(eval_dataloader)
-    batch_size = eval_dataloader.batch_size
-    metric_name = "eval_f1"
-
-    def take_eval_steps(model, trainer, metric_name, save_metrics=False):
-
-        trainer.model = model
-        start_time = timeit.default_timer()
-        metrics = trainer.evaluate()
-        evalTime = timeit.default_timer() - start_time
-        max_eval_samples = data_args.max_eval_samples \
-            if data_args.max_eval_samples is not None else len(eval_dataset)
-        eval_samples = min(max_eval_samples, len(eval_dataset))
-        samples = eval_samples - (eval_samples % batch_size) \
-            if training_args.dataloader_drop_last else eval_samples
-        if save_metrics:
-            trainer.save_metrics("eval", metrics)
-        logger.info("metrics keys: {}".format(metrics.keys()))
-        print('Batch size = %d' % batch_size)
-        print("Finally Eval {} Accuracy: {}".format(metric_name, metrics.get(metric_name)))
-        print("Latency: %.3f ms" % (evalTime / samples * 1000))
-        print("Throughput: {} samples/sec".format(samples / evalTime))
-        return metrics.get(metric_name)
-
-    def eval_func(model):
-        return take_eval_steps(model, trainer, metric_name)
-
-    if model_args.tune:
-
-        if not training_args.do_eval:
-            raise ValueError("do_eval must be set to True for quantization.")
-
-        from neural_compressor.config import PostTrainingQuantConfig
-        from neural_compressor import quantization
-        conf = PostTrainingQuantConfig()
-        q_model = quantization.fit(model,
-                                   conf,
-                                   calib_dataloader=eval_dataloader,
-                                   eval_func=eval_func)
-        q_model.save(training_args.output_dir)
-        return
-
-    if model_args.int8:
-        from neural_compressor.utils.pytorch import load
-        model = load(training_args.output_dir, model, dataloader=eval_dataloader)
-    if model_args.performance or model_args.accuracy:
-        if model_args.performance:
-            from neural_compressor.config import BenchmarkConfig
-            from neural_compressor import benchmark
-            b_conf = BenchmarkConfig(warmup=5,
-                                     iteration=model_args.iters,
-                                     cores_per_instance=4,
-                                     num_of_instance=1)
-            benchmark.fit(model, b_conf, b_dataloader=eval_dataloader)
-        else:
-            eval_func(model)
-        return
-
-    # Training
-    if training_args.do_train:
-        checkpoint = None
-        if training_args.resume_from_checkpoint is not None:
-            checkpoint = training_args.resume_from_checkpoint
-        elif last_checkpoint is not None:
-            checkpoint = last_checkpoint
-        train_result = trainer.train(resume_from_checkpoint=checkpoint)
-        trainer.save_model()  # Saves the tokenizer too for easy upload
-
-        metrics = train_result.metrics
-        max_train_samples = (
-            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
-        )
-        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
-
-        trainer.log_metrics("train", metrics)
-        trainer.save_metrics("train", metrics)
-        trainer.save_state()
-
-    # Evaluation
-    if training_args.do_eval:
-        logger.info("*** Evaluate ***")
-        metrics = trainer.evaluate()
-
-        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
-        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
-
-        trainer.log_metrics("eval", metrics)
-        trainer.save_metrics("eval", metrics)
-
-    # Prediction
-    if training_args.do_predict:
-        logger.info("*** Predict ***")
-        results = trainer.predict(predict_dataset, predict_examples)
-        metrics = results.metrics
-
-        max_predict_samples = (
-            data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
-        )
-        metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset))
-
-        trainer.log_metrics("predict", metrics)
-        trainer.save_metrics("predict", metrics)
-
-    if training_args.push_to_hub:
-        kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"}
-        if data_args.dataset_name is not None:
-            kwargs["dataset_tags"] = data_args.dataset_name
-            if data_args.dataset_config_name is not None:
-                kwargs["dataset_args"] = data_args.dataset_config_name
-                kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
-            else:
-                kwargs["dataset"] = data_args.dataset_name
-
-        trainer.push_to_hub(**kwargs)
-
-
-def _mp_fn(index):
-    # For xla_spawn (TPUs)
-    main()
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2020 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Fine-tuning the library models for question answering.
+"""
+# You can also adapt this script on your own question answering task. Pointers for this are left as comments.
+
+import logging
+import os
+import sys
+from dataclasses import dataclass, field
+from typing import Optional
+import timeit
+
+import datasets
+from datasets import load_dataset, load_metric
+
+import transformers
+from trainer_qa import QuestionAnsweringTrainer
+from transformers import (
+    AutoConfig,
+    AutoModelForQuestionAnswering,
+    AutoTokenizer,
+    DataCollatorWithPadding,
+    EvalPrediction,
+    HfArgumentParser,
+    PreTrainedTokenizerFast,
+    TrainingArguments,
+    default_data_collator,
+    set_seed,
+)
+from transformers.trainer_utils import get_last_checkpoint
+from transformers.utils import check_min_version
+from transformers.utils.versions import require_version
+from utils_qa import postprocess_qa_predictions
+
+
+# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
+check_min_version("4.10.0")
+
+require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ModelArguments:
+    """
+    Arguments pertaining to which model/config/tokenizer we are going to fine-tune from.
+    """
+
+    model_name_or_path: str = field(
+        metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models"}
+    )
+    config_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained config name or path if not the same as model_name"}
+    )
+    tokenizer_name: Optional[str] = field(
+        default=None, metadata={"help": "Pretrained tokenizer name or path if not the same as model_name"}
+    )
+    cache_dir: Optional[str] = field(
+        default=None,
+        metadata={"help": "Path to directory to store the pretrained models downloaded from huggingface.co"},
+    )
+    model_revision: str = field(
+        default="main",
+        metadata={"help": "The specific model version to use (can be a branch name, tag name or commit id)."},
+    )
+    use_auth_token: bool = field(
+        default=False,
+        metadata={
+            "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script "
+            "with private models)."
+        },
+    )
+    tune: bool = field(
+        default=False,
+        metadata={"help": "Whether or not to apply quantization."},
+    )
+    int8: bool = field(
+        default=False, metadata={"help": "use int8 model to get accuracy or benchmark"}
+    )
+    performance: bool = field(
+        default=False, metadata={"help": "get benchmark instead of accuracy"}
+    )
+    accuracy: bool = field(
+        default=False, metadata={"help": "get accuracy"}
+    )
+    iters: int = field(
+        default=100,
+        metadata={
+            "help": "The inference iterations to run for benchmark."
+        },
+    )
+
+
+@dataclass
+class DataTrainingArguments:
+    """
+    Arguments pertaining to what data we are going to input our model for training and eval.
+    """
+
+    dataset_name: Optional[str] = field(
+        default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."}
+    )
+    dataset_config_name: Optional[str] = field(
+        default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
+    )
+    train_file: Optional[str] = field(default=None, metadata={"help": "The input training data file (a text file)."})
+    validation_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input evaluation data file to evaluate the perplexity on (a text file)."},
+    )
+    test_file: Optional[str] = field(
+        default=None,
+        metadata={"help": "An optional input test data file to evaluate the perplexity on (a text file)."},
+    )
+    overwrite_cache: bool = field(
+        default=False, metadata={"help": "Overwrite the cached training and evaluation sets"}
+    )
+    preprocessing_num_workers: Optional[int] = field(
+        default=None,
+        metadata={"help": "The number of processes to use for the preprocessing."},
+    )
+    max_seq_length: int = field(
+        default=384,
+        metadata={
+            "help": "The maximum total input sequence length after tokenization. Sequences longer "
+            "than this will be truncated, sequences shorter will be padded."
+        },
+    )
+    pad_to_max_length: bool = field(
+        default=True,
+        metadata={
+            "help": "Whether to pad all samples to `max_seq_length`. "
+            "If False, will pad the samples dynamically when batching to the maximum length in the batch (which can "
+            "be faster on GPU but will be slower on TPU)."
+        },
+    )
+    max_train_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of training examples to this "
+            "value if set."
+        },
+    )
+    max_eval_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this "
+            "value if set."
+        },
+    )
+    max_predict_samples: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "For debugging purposes or quicker training, truncate the number of prediction examples to this "
+            "value if set."
+        },
+    )
+    version_2_with_negative: bool = field(
+        default=False, metadata={"help": "If true, some of the examples do not have an answer."}
+    )
+    null_score_diff_threshold: float = field(
+        default=0.0,
+        metadata={
+            "help": "The threshold used to select the null answer: if the best answer has a score that is less than "
+            "the score of the null answer minus this threshold, the null answer is selected for this example. "
+            "Only useful when `version_2_with_negative=True`."
+        },
+    )
+    doc_stride: int = field(
+        default=128,
+        metadata={"help": "When splitting up a long document into chunks, how much stride to take between chunks."},
+    )
+    n_best_size: int = field(
+        default=20,
+        metadata={"help": "The total number of n-best predictions to generate when looking for an answer."},
+    )
+    max_answer_length: int = field(
+        default=30,
+        metadata={
+            "help": "The maximum length of an answer that can be generated. This is needed because the start "
+            "and end predictions are not conditioned on one another."
+        },
+    )
+
+    def __post_init__(self):
+        if (
+            self.dataset_name is None
+            and self.train_file is None
+            and self.validation_file is None
+            and self.test_file is None
+        ):
+            raise ValueError("Need either a dataset name or a training/validation file/test_file.")
+        else:
+            if self.train_file is not None:
+                extension = self.train_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`train_file` should be a csv or a json file."
+            if self.validation_file is not None:
+                extension = self.validation_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`validation_file` should be a csv or a json file."
+            if self.test_file is not None:
+                extension = self.test_file.split(".")[-1]
+                assert extension in ["csv", "json"], "`test_file` should be a csv or a json file."
+
+
+def main():
+    # See all possible arguments in src/transformers/training_args.py
+    # or by passing the --help flag to this script.
+    # We now keep distinct sets of args, for a cleaner separation of concerns.
+
+    parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
+    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
+        # If we pass only one argument to the script and it's the path to a json file,
+        # let's parse it to get our arguments.
+        model_args, data_args, training_args = parser.parse_json_file(
+            json_file=os.path.abspath(sys.argv[1])
+        )
+    else:
+        model_args, data_args, training_args = parser.parse_args_into_dataclasses()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+
+    log_level = training_args.get_process_log_level()
+    logger.setLevel(log_level)
+    datasets.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.set_verbosity(log_level)
+    transformers.utils.logging.enable_default_handler()
+    transformers.utils.logging.enable_explicit_format()
+
+    # Log on each process the small summary:
+    logger.warning(
+        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
+        + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
+    )
+    logger.info(f"Training/evaluation parameters {training_args}")
+
+    # Detecting last checkpoint.
+    last_checkpoint = None
+    if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir:
+        last_checkpoint = get_last_checkpoint(training_args.output_dir)
+        if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0:
+            raise ValueError(
+                f"Output directory ({training_args.output_dir}) already exists and is not empty. "
+                "Use --overwrite_output_dir to overcome."
+            )
+        elif last_checkpoint is not None and training_args.resume_from_checkpoint is None:
+            logger.info(
+                f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change "
+                "the `--output_dir` or add `--overwrite_output_dir` to train from scratch."
+            )
+
+    # Set seed before initializing model.
+    set_seed(training_args.seed)
+
+    # Get the datasets: you can either provide your own CSV/JSON/TXT training and evaluation files (see below)
+    # or just provide the name of one of the public datasets available on the hub at https://huggingface.co/datasets/
+    # (the dataset will be downloaded automatically from the datasets Hub).
+    #
+    # For CSV/JSON files, this script will use the column called 'text' or the first column if no column called
+    # 'text' is found. You can easily tweak this behavior (see below).
+    #
+    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
+    # download the dataset.
+    if data_args.dataset_name is not None:
+        # Downloading and loading a dataset from the hub.
+        raw_datasets = load_dataset(
+            data_args.dataset_name, data_args.dataset_config_name, cache_dir=model_args.cache_dir
+        )
+    else:
+        data_files = {}
+        if data_args.train_file is not None:
+            data_files["train"] = data_args.train_file
+            extension = data_args.train_file.split(".")[-1]
+
+        if data_args.validation_file is not None:
+            data_files["validation"] = data_args.validation_file
+            extension = data_args.validation_file.split(".")[-1]
+        if data_args.test_file is not None:
+            data_files["test"] = data_args.test_file
+            extension = data_args.test_file.split(".")[-1]
+        raw_datasets = load_dataset(extension, data_files=data_files, field="data", cache_dir=model_args.cache_dir)
+    # See more about loading any type of standard or custom dataset (from files, python dict, pandas DataFrame, etc) at
+    # https://huggingface.co/docs/datasets/loading_datasets.html.
+
+    # Load pretrained model and tokenizer
+    #
+    # Distributed training:
+    # The .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    config = AutoConfig.from_pretrained(
+        model_args.config_name if model_args.config_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path,
+        cache_dir=model_args.cache_dir,
+        use_fast=True,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+    model = AutoModelForQuestionAnswering.from_pretrained(
+        model_args.model_name_or_path,
+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
+        config=config,
+        cache_dir=model_args.cache_dir,
+        revision=model_args.model_revision,
+        use_auth_token=True if model_args.use_auth_token else None,
+    )
+
+    # Tokenizer check: this script requires a fast tokenizer.
+    if not isinstance(tokenizer, PreTrainedTokenizerFast):
+        raise ValueError(
+            "This example script only works for models that have a fast tokenizer. Checkout the big table of models "
+            "at https://huggingface.co/transformers/index.html#supported-frameworks to find the model types that meet this "
+            "requirement"
+        )
+
+    # Preprocessing the datasets.
+    # Preprocessing is slightly different for training and evaluation.
+    if training_args.do_train:
+        column_names = raw_datasets["train"].column_names
+    elif training_args.do_eval:
+        column_names = raw_datasets["validation"].column_names
+    else:
+        column_names = raw_datasets["test"].column_names
+    question_column_name = "question" if "question" in column_names else column_names[0]
+    context_column_name = "context" if "context" in column_names else column_names[1]
+    answer_column_name = "answers" if "answers" in column_names else column_names[2]
+
+    # Padding side determines if we do (question|context) or (context|question).
+    pad_on_right = tokenizer.padding_side == "right"
+
+    if data_args.max_seq_length > tokenizer.model_max_length:
+        logger.warning(
+            f"The max_seq_length passed ({data_args.max_seq_length}) is larger than the maximum length for the"
+            f"model ({tokenizer.model_max_length}). Using max_seq_length={tokenizer.model_max_length}."
+        )
+    max_seq_length = min(data_args.max_seq_length, tokenizer.model_max_length)
+
+    # Training preprocessing
+    def prepare_train_features(examples):
+        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+        # left whitespace
+        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
+        # in one example possible giving several features when a context is long, each of those features having a
+        # context that overlaps a bit the context of the previous feature.
+        tokenized_examples = tokenizer(
+            examples[question_column_name if pad_on_right else context_column_name],
+            examples[context_column_name if pad_on_right else question_column_name],
+            truncation="only_second" if pad_on_right else "only_first",
+            max_length=max_seq_length,
+            stride=data_args.doc_stride,
+            return_overflowing_tokens=True,
+            return_offsets_mapping=True,
+            padding="max_length" if data_args.pad_to_max_length else False,
+        )
+
+        # Since one example might give us several features if it has a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+        # The offset mappings will give us a map from token to character position in the original context. This will
+        # help us compute the start_positions and end_positions.
+        offset_mapping = tokenized_examples.pop("offset_mapping")
+
+        # Let's label those examples!
+        tokenized_examples["start_positions"] = []
+        tokenized_examples["end_positions"] = []
+
+        for i, offsets in enumerate(offset_mapping):
+            # We will label impossible answers with the index of the CLS token.
+            input_ids = tokenized_examples["input_ids"][i]
+            cls_index = input_ids.index(tokenizer.cls_token_id)
+
+            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+
+            # One example can give several spans, this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            answers = examples[answer_column_name][sample_index]
+            # If no answers are given, set the cls_index as answer.
+            if len(answers["answer_start"]) == 0:
+                tokenized_examples["start_positions"].append(cls_index)
+                tokenized_examples["end_positions"].append(cls_index)
+            else:
+                # Start/end character index of the answer in the text.
+                start_char = answers["answer_start"][0]
+                end_char = start_char + len(answers["text"][0])
+
+                # Start token index of the current span in the text.
+                token_start_index = 0
+                while sequence_ids[token_start_index] != (1 if pad_on_right else 0):
+                    token_start_index += 1
+
+                # End token index of the current span in the text.
+                token_end_index = len(input_ids) - 1
+                while sequence_ids[token_end_index] != (1 if pad_on_right else 0):
+                    token_end_index -= 1
+
+                # Detect if the answer is out of the span (in which case this feature is labeled with the CLS index).
+                if not (offsets[token_start_index][0] <= start_char and offsets[token_end_index][1] >= end_char):
+                    tokenized_examples["start_positions"].append(cls_index)
+                    tokenized_examples["end_positions"].append(cls_index)
+                else:
+                    # Otherwise move the token_start_index and token_end_index to the two ends of the answer.
+                    # Note: we could go after the last offset if the answer is the last word (edge case).
+                    while token_start_index < len(offsets) and offsets[token_start_index][0] <= start_char:
+                        token_start_index += 1
+                    tokenized_examples["start_positions"].append(token_start_index - 1)
+                    while offsets[token_end_index][1] >= end_char:
+                        token_end_index -= 1
+                    tokenized_examples["end_positions"].append(token_end_index + 1)
+
+        return tokenized_examples
+
+    if training_args.do_train:
+        if "train" not in raw_datasets:
+            raise ValueError("--do_train requires a train dataset")
+        train_dataset = raw_datasets["train"]
+        if data_args.max_train_samples is not None:
+            # We will select sample from whole data if argument is specified
+            train_dataset = train_dataset.select(range(data_args.max_train_samples))
+        # Create train feature from dataset
+        with training_args.main_process_first(desc="train dataset map pre-processing"):
+            train_dataset = train_dataset.map(
+                prepare_train_features,
+                batched=True,
+                num_proc=data_args.preprocessing_num_workers,
+                remove_columns=column_names,
+                load_from_cache_file=not data_args.overwrite_cache,
+                desc="Running tokenizer on train dataset",
+            )
+        if data_args.max_train_samples is not None:
+            # Number of samples might increase during Feature Creation, We select only specified max samples
+            train_dataset = train_dataset.select(range(data_args.max_train_samples))
+
+    # Validation preprocessing
+    def prepare_validation_features(examples):
+        # Some of the questions have lots of whitespace on the left, which is not useful and will make the
+        # truncation of the context fail (the tokenized question will take a lots of space). So we remove that
+        # left whitespace
+        examples[question_column_name] = [q.lstrip() for q in examples[question_column_name]]
+
+        # Tokenize our examples with truncation and maybe padding, but keep the overflows using a stride. This results
+        # in one example possible giving several features when a context is long, each of those features having a
+        # context that overlaps a bit the context of the previous feature.
+        tokenized_examples = tokenizer(
+            examples[question_column_name if pad_on_right else context_column_name],
+            examples[context_column_name if pad_on_right else question_column_name],
+            truncation="only_second" if pad_on_right else "only_first",
+            max_length=max_seq_length,
+            stride=data_args.doc_stride,
+            return_overflowing_tokens=True,
+            return_offsets_mapping=True,
+            padding="max_length" if data_args.pad_to_max_length else False,
+        )
+
+        # Since one example might give us several features if it has a long context, we need a map from a feature to
+        # its corresponding example. This key gives us just that.
+        sample_mapping = tokenized_examples.pop("overflow_to_sample_mapping")
+
+        # For evaluation, we will need to convert our predictions to substrings of the context, so we keep the
+        # corresponding example_id and we will store the offset mappings.
+        tokenized_examples["example_id"] = []
+
+        for i in range(len(tokenized_examples["input_ids"])):
+            # Grab the sequence corresponding to that example (to know what is the context and what is the question).
+            sequence_ids = tokenized_examples.sequence_ids(i)
+            context_index = 1 if pad_on_right else 0
+
+            # One example can give several spans, this is the index of the example containing this span of text.
+            sample_index = sample_mapping[i]
+            tokenized_examples["example_id"].append(examples["id"][sample_index])
+
+            # Set to None the offset_mapping that are not part of the context so it's easy to determine if a token
+            # position is part of the context or not.
+            tokenized_examples["offset_mapping"][i] = [
+                (o if sequence_ids[k] == context_index else None)
+                for k, o in enumerate(tokenized_examples["offset_mapping"][i])
+            ]
+
+        return tokenized_examples
+
+    if training_args.do_eval:
+        if "validation" not in raw_datasets:
+            raise ValueError("--do_eval requires a validation dataset")
+        eval_examples = raw_datasets["validation"]
+        if data_args.max_eval_samples is not None:
+            # We will select sample from whole data
+            eval_examples = eval_examples.select(range(data_args.max_eval_samples))
+        # Validation Feature Creation
+        with training_args.main_process_first(desc="validation dataset map pre-processing"):
+            eval_dataset = eval_examples.map(
+                prepare_validation_features,
+                batched=True,
+                num_proc=data_args.preprocessing_num_workers,
+                remove_columns=column_names,
+                load_from_cache_file=not data_args.overwrite_cache,
+                desc="Running tokenizer on validation dataset",
+            )
+        if data_args.max_eval_samples is not None:
+            # During Feature creation dataset samples might increase, we will select required samples again
+            eval_dataset = eval_dataset.select(range(data_args.max_eval_samples))
+        if training_args.dataloader_drop_last:
+            data_lens = len(eval_dataset)
+            data_lens -= data_lens % training_args.per_device_eval_batch_size
+            eval_dataset = eval_dataset.select(range(data_lens))
+
+    if training_args.do_predict:
+        if "test" not in raw_datasets:
+            raise ValueError("--do_predict requires a test dataset")
+        predict_examples = raw_datasets["test"]
+        if data_args.max_predict_samples is not None:
+            # We will select sample from whole data
+            predict_examples = predict_examples.select(range(data_args.max_predict_samples))
+        # Predict Feature Creation
+        with training_args.main_process_first(desc="prediction dataset map pre-processing"):
+            predict_dataset = predict_examples.map(
+                prepare_validation_features,
+                batched=True,
+                num_proc=data_args.preprocessing_num_workers,
+                remove_columns=column_names,
+                load_from_cache_file=not data_args.overwrite_cache,
+                desc="Running tokenizer on prediction dataset",
+            )
+        if data_args.max_predict_samples is not None:
+            # During Feature creation dataset samples might increase, we will select required samples again
+            predict_dataset = predict_dataset.select(range(data_args.max_predict_samples))
+
+    # Data collator
+    # We have already padded to max length if the corresponding flag is True, otherwise we need to pad in the data
+    # collator.
+    data_collator = (
+        default_data_collator
+        if data_args.pad_to_max_length
+        else DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
+    )
+
+    # Post-processing:
+    def post_processing_function(examples, features, predictions, stage="eval"):
+        # Post-processing: we match the start logits and end logits to answers in the original context.
+        predictions = postprocess_qa_predictions(
+            examples=examples,
+            features=features,
+            predictions=predictions,
+            version_2_with_negative=data_args.version_2_with_negative,
+            n_best_size=data_args.n_best_size,
+            max_answer_length=data_args.max_answer_length,
+            null_score_diff_threshold=data_args.null_score_diff_threshold,
+            output_dir=training_args.output_dir,
+            log_level=log_level,
+            prefix=stage,
+        )
+        # Format the result to the format the metric expects.
+        if data_args.version_2_with_negative:
+            formatted_predictions = [
+                {"id": k, "prediction_text": v, "no_answer_probability": 0.0} for k, v in predictions.items()
+            ]
+        else:
+            formatted_predictions = [{"id": k, "prediction_text": v} for k, v in predictions.items()]
+
+        references = [{"id": ex["id"], "answers": ex[answer_column_name]} for ex in examples]
+        return EvalPrediction(predictions=formatted_predictions, label_ids=references)
+
+    metric = load_metric("squad_v2" if data_args.version_2_with_negative else "squad")
+
+    def compute_metrics(p: EvalPrediction):
+        return metric.compute(predictions=p.predictions, references=p.label_ids)
+
+    # Initialize our Trainer
+    trainer = QuestionAnsweringTrainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset if training_args.do_train else None,
+        eval_dataset=eval_dataset if training_args.do_eval else None,
+        eval_examples=eval_examples if training_args.do_eval else None,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+        post_process_function=post_processing_function,
+        compute_metrics=compute_metrics,
+    )
+
+    eval_dataloader = trainer.get_eval_dataloader()
+    # transformer issue #1
+    # for transformers 4.31.0: accelerate dataloader
+    # *** ValueError: batch_size attribute should not be set 
+    # after DataLoaderShard is initialized
+    if eval_dataloader.batch_size is None:
+        def _build_inc_dataloader(dataloader):
+            class INCDataLoader:
+                __iter__ = dataloader.__iter__
+                def __init__(self) -> None:
+                    self.dataloader = dataloader
+                    self.batch_size = dataloader.total_batch_size
+            return INCDataLoader()
+        eval_dataloader = _build_inc_dataloader(eval_dataloader)
+    batch_size = eval_dataloader.batch_size
+    metric_name = "eval_f1"
+
+    def take_eval_steps(model, trainer, metric_name, save_metrics=False):
+
+        trainer.model = model
+        start_time = timeit.default_timer()
+        metrics = trainer.evaluate()
+        evalTime = timeit.default_timer() - start_time
+        max_eval_samples = data_args.max_eval_samples \
+            if data_args.max_eval_samples is not None else len(eval_dataset)
+        eval_samples = min(max_eval_samples, len(eval_dataset))
+        samples = eval_samples - (eval_samples % batch_size) \
+            if training_args.dataloader_drop_last else eval_samples
+        if save_metrics:
+            trainer.save_metrics("eval", metrics)
+        logger.info("metrics keys: {}".format(metrics.keys()))
+        print('Batch size = %d' % batch_size)
+        print("Finally Eval {} Accuracy: {}".format(metric_name, metrics.get(metric_name)))
+        print("Latency: %.3f ms" % (evalTime / samples * 1000))
+        print("Throughput: {} samples/sec".format(samples / evalTime))
+        return metrics.get(metric_name)
+
+    def eval_func(model):
+        return take_eval_steps(model, trainer, metric_name)
+
+    if model_args.tune:
+
+        if not training_args.do_eval:
+            raise ValueError("do_eval must be set to True for quantization.")
+
+        from neural_compressor.config import PostTrainingQuantConfig
+        from neural_compressor import quantization
+        conf = PostTrainingQuantConfig()
+        q_model = quantization.fit(model,
+                                   conf,
+                                   calib_dataloader=eval_dataloader,
+                                   eval_func=eval_func)
+        q_model.save(training_args.output_dir)
+        return
+
+    if model_args.int8:
+        from neural_compressor.utils.pytorch import load
+        model = load(training_args.output_dir, model, dataloader=eval_dataloader)
+    if model_args.performance or model_args.accuracy:
+        if model_args.performance:
+            from neural_compressor.config import BenchmarkConfig
+            from neural_compressor import benchmark
+            b_conf = BenchmarkConfig(warmup=5,
+                                     iteration=model_args.iters,
+                                     cores_per_instance=4,
+                                     num_of_instance=1)
+            benchmark.fit(model, b_conf, b_dataloader=eval_dataloader)
+        else:
+            eval_func(model)
+        return
+
+    # Training
+    if training_args.do_train:
+        checkpoint = None
+        if training_args.resume_from_checkpoint is not None:
+            checkpoint = training_args.resume_from_checkpoint
+        elif last_checkpoint is not None:
+            checkpoint = last_checkpoint
+        train_result = trainer.train(resume_from_checkpoint=checkpoint)
+        trainer.save_model()  # Saves the tokenizer too for easy upload
+
+        metrics = train_result.metrics
+        max_train_samples = (
+            data_args.max_train_samples if data_args.max_train_samples is not None else len(train_dataset)
+        )
+        metrics["train_samples"] = min(max_train_samples, len(train_dataset))
+
+        trainer.log_metrics("train", metrics)
+        trainer.save_metrics("train", metrics)
+        trainer.save_state()
+
+    # Evaluation
+    if training_args.do_eval:
+        logger.info("*** Evaluate ***")
+        metrics = trainer.evaluate()
+
+        max_eval_samples = data_args.max_eval_samples if data_args.max_eval_samples is not None else len(eval_dataset)
+        metrics["eval_samples"] = min(max_eval_samples, len(eval_dataset))
+
+        trainer.log_metrics("eval", metrics)
+        trainer.save_metrics("eval", metrics)
+
+    # Prediction
+    if training_args.do_predict:
+        logger.info("*** Predict ***")
+        results = trainer.predict(predict_dataset, predict_examples)
+        metrics = results.metrics
+
+        max_predict_samples = (
+            data_args.max_predict_samples if data_args.max_predict_samples is not None else len(predict_dataset)
+        )
+        metrics["predict_samples"] = min(max_predict_samples, len(predict_dataset))
+
+        trainer.log_metrics("predict", metrics)
+        trainer.save_metrics("predict", metrics)
+
+    if training_args.push_to_hub:
+        kwargs = {"finetuned_from": model_args.model_name_or_path, "tasks": "question-answering"}
+        if data_args.dataset_name is not None:
+            kwargs["dataset_tags"] = data_args.dataset_name
+            if data_args.dataset_config_name is not None:
+                kwargs["dataset_args"] = data_args.dataset_config_name
+                kwargs["dataset"] = f"{data_args.dataset_name} {data_args.dataset_config_name}"
+            else:
+                kwargs["dataset"] = data_args.dataset_name
+
+        trainer.push_to_hub(**kwargs)
+
+
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/trainer_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/trainer_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/mixed_precision/ipex/trainer_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/trainer_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_qa.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/trainer_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/trainer_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/fx/trainer_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/trainer_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/utils_qa.py b/examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/utils_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/utils_qa.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/utils_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md
similarity index 96%
rename from examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md
index 035d74cbb79..31d2a65a298 100644
--- a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/README.md
@@ -76,4 +76,4 @@ quantized_model = load(tuned_checkpoint,
                        model)
 ```
 --------
-For more details, please refer to the [sample code](./run_summarization.py).
\ No newline at end of file
+For more details, please refer to the [sample code](run_summarization.py).
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_summarization.py b/examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_summarization.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_summarization.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/summarization/quantization/ptq_dynamic/fx/run_summarization.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/run_glue_no_trainer_distillation.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/run_glue_no_trainer_distillation.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/distillation/eager/run_glue_no_trainer_distillation.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/distillation/eager/run_glue_no_trainer_distillation.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/onnx_evaluation.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/onnx_evaluation.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/onnx_evaluation.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/onnx_evaluation.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_export.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_export.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_export.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_export.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_glue.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_glue.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/export/fx/run_glue.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/export/fx/run_glue.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/distillation_for_quantization/fx/run_glue_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/run_glue_no_trainer_pruneOFA.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/run_glue_no_trainer_pruneOFA.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/run_glue_no_trainer_pruneOFA.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/optimization_pipeline/prune_once_for_all/fx/run_glue_no_trainer_pruneOFA.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt
similarity index 89%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt
index 5f658bdf9f6..6e46960f4c2 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/requirements.txt
@@ -1,11 +1,11 @@
-accelerate
-datasets
-sentencepiece
-scipy
-scikit-learn
-protobuf
-torch
-evaluate
-transformers
-tqdm
-
+accelerate
+datasets
+sentencepiece
+scipy
+scikit-learn
+protobuf
+torch
+evaluate
+transformers
+tqdm
+
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/run_glue_no_trainer_mixed.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_2in4.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_2in4.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_2in4.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_2in4.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_dense_finetune.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_dense_finetune.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_dense_finetune.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_dense_finetune.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_mixed.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_mixed.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_mixed.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_mixed.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_perchannel.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_perchannel.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_perchannel.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_mrpc_perchannel.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_2in4.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_2in4.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_2in4.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_2in4.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_dense_fintune.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_dense_fintune.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_dense_fintune.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_dense_fintune.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_perchannel.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_perchannel.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_perchannel.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/bertmini_sst2_perchannel.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_2in4.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_2in4.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_2in4.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_2in4.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_4x1.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_4x1.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_4x1.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager/scripts/distilbert_mrpc_4x1.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt
similarity index 89%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt
index af176cfbaec..86086f4a36d 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/requirements.txt
@@ -1,12 +1,12 @@
-accelerate
-datasets
-sentencepiece
-scipy
-scikit-learn
-protobuf
-torch
-evaluate
-transformers
-tqdm
-xgboost
-
+accelerate
+datasets
+sentencepiece
+scipy
+scikit-learn
+protobuf
+torch
+evaluate
+transformers
+tqdm
+xgboost
+
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/run_glue_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/run_glue_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/run_glue_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/hpo/run_glue_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt
similarity index 94%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt
index 4f8acdcd5f8..a3c0ccd0eb2 100644
--- a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/requirements.txt
@@ -1,9 +1,9 @@
-datasets>=1.1.3
-sentencepiece!=0.1.92
-protobuf<=3.20.3
-scipy
-scikit-learn
-Keras-Preprocessing
-transformers>=4.16.0
-accelerate
+datasets>=1.1.3
+sentencepiece!=0.1.92
+protobuf<=3.20.3
+scipy
+scikit-learn
+Keras-Preprocessing
+transformers>=4.16.0
+accelerate
 torch>=1.9.0
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_glue.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_dynamic/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_distributed_tuning.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_distributed_tuning.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_distributed_tuning.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_distributed_tuning.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_glue.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_glue.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-classification/quantization/qat/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/inference/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/inference/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/inference/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/inference/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/inference/run_sd.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/run_sd.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/inference/run_sd.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/inference/run_sd.py
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md
similarity index 97%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md
index 1033395407f..6b55768ef74 100644
--- a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/README.md
@@ -1,75 +1,75 @@
-Step-by-Step
-============
-This document describes the step-by-step instructions for reproducing stable diffusion tuning results with Intel® Neural Compressor.
-
-The script ```run_diffusion.py``` is based on [huggingface/diffusers](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) and provides post-training static quantization approach based on Intel® Neural Compressor.
-
-# Prerequisite
-
-## 1. Environment
-```
-pip install -r requirements.txt
-```
-> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-## 2. Prepare Datasets
-### Metric and Ground Truth Image
-FID metric is used to evaluate the model in this case, so we should download [training datasets](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions) and choose one image to a directory(like "base_images").
->**Note:** In this case we used picture: [Ground_Truth_Image](https://datasets-server.huggingface.co/assets/lambdalabs/pokemon-blip-captions/--/lambdalabs--pokemon-blip-captions/train/14/image/image.jpg).
-
-# Quantization
-
-```bash
-python run_diffusion.py \
-    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
-    --tune \
-    --quantization_approach static \
-    --perf_tol 0.02 \
-    --output_dir /tmp/diffusion_output \
-    --base_images base_images \
-    --input_text "a drawing of a gray and black dragon" \
-    --calib_text "a drawing of a green pokemon with red eyes"
-```
-
-
-## The ground truth images and before and after quantization images show below:
-
-The ground truth image:
-
-<div align="left">
-<img src=images/ground_truth.jpg width=40%/>
-</div>
-
-The image generated by original model(FID with ground truth: 333):
-
-<div align="left">
-<img src=images/fp32.png width=40%/>
-</div>
-
-The image generated by quantized UNet(FID with ground truth: 246):
-
-<div align="left">
-<img src=images/int8.png width=40%/>
-</div>
-
-
-# Performance
-Original model
-```bash
-python run_diffusion.py \
-    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
-    --output_dir /tmp/diffusion_output \
-    --base_images base_images \
-    --benchmark
-```
-The model of quantized UNet
-```bash
-python run_diffusion.py \
-    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
-    --output_dir /tmp/diffusion_output \
-    --base_images base_images \
-    --benchmark \
-    --int8
-```
-
->**Note:** Inference performance speedup with Intel DL Boost (VNNI) on Intel(R) Xeon(R) hardware, Please refer to [Performance Tuning Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html) for more optimizations.
+Step-by-Step
+============
+This document describes the step-by-step instructions for reproducing stable diffusion tuning results with Intel® Neural Compressor.
+
+The script ```run_diffusion.py``` is based on [huggingface/diffusers](https://github.com/huggingface/diffusers/tree/main/examples/text_to_image) and provides post-training static quantization approach based on Intel® Neural Compressor.
+
+# Prerequisite
+
+## 1. Environment
+```
+pip install -r requirements.txt
+```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+## 2. Prepare Datasets
+### Metric and Ground Truth Image
+FID metric is used to evaluate the model in this case, so we should download [training datasets](https://huggingface.co/datasets/lambdalabs/pokemon-blip-captions) and choose one image to a directory(like "base_images").
+>**Note:** In this case we used picture: [Ground_Truth_Image](https://datasets-server.huggingface.co/assets/lambdalabs/pokemon-blip-captions/--/lambdalabs--pokemon-blip-captions/train/14/image/image.jpg).
+
+# Quantization
+
+```bash
+python run_diffusion.py \
+    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
+    --tune \
+    --quantization_approach static \
+    --perf_tol 0.02 \
+    --output_dir /tmp/diffusion_output \
+    --base_images base_images \
+    --input_text "a drawing of a gray and black dragon" \
+    --calib_text "a drawing of a green pokemon with red eyes"
+```
+
+
+## The ground truth images and before and after quantization images show below:
+
+The ground truth image:
+
+<div align="left">
+<img src=images/ground_truth.jpg width=40%/>
+</div>
+
+The image generated by original model(FID with ground truth: 333):
+
+<div align="left">
+<img src=images/fp32.png width=40%/>
+</div>
+
+The image generated by quantized UNet(FID with ground truth: 246):
+
+<div align="left">
+<img src=images/int8.png width=40%/>
+</div>
+
+
+# Performance
+Original model
+```bash
+python run_diffusion.py \
+    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
+    --output_dir /tmp/diffusion_output \
+    --base_images base_images \
+    --benchmark
+```
+The model of quantized UNet
+```bash
+python run_diffusion.py \
+    --model_name_or_path lambdalabs/sd-pokemon-diffusers \
+    --output_dir /tmp/diffusion_output \
+    --base_images base_images \
+    --benchmark \
+    --int8
+```
+
+>**Note:** Inference performance speedup with Intel DL Boost (VNNI) on Intel(R) Xeon(R) hardware, Please refer to [Performance Tuning Guide](https://intel.github.io/intel-extension-for-pytorch/cpu/latest/tutorials/performance_tuning/tuning_guide.html) for more optimizations.
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/base_images/image.jpg b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/base_images/image.jpg
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/base_images/image.jpg
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/base_images/image.jpg
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/fp32.png b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/fp32.png
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/fp32.png
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/fp32.png
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/ground_truth.jpg b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/ground_truth.jpg
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/ground_truth.jpg
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/ground_truth.jpg
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/int8.png b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/int8.png
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/images/int8.png
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/images/int8.png
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
similarity index 97%
rename from examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
index 1dd38d7a115..b49f8a98546 100644
--- a/examples/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/text-to-image/quantization/run_diffusion.py
@@ -1,337 +1,337 @@
-#!/usr/bin/env python
-# coding=utf-8
-#  Copyright 2021 The HuggingFace Team. All rights reserved.
-#
-#  Licensed under the Apache License, Version 2.0 (the "License");
-#  you may not use this file except in compliance with the License.
-#  You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-#  Unless required by applicable law or agreed to in writing, software
-#  distributed under the License is distributed on an "AS IS" BASIS,
-#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#  See the License for the specific language governing permissions and
-#  limitations under the License.
-
-""" Example for stable-diffusion to generate a picture from a text ."""
-# You can also adapt this script on your own text to image task. Pointers for this are left as comments.
-
-import argparse
-import logging
-import math
-import os
-import sys
-import time
-
-import torch
-from PIL import Image
-from torch.utils.data import DataLoader, Dataset
-
-from accelerate.utils import set_seed
-from diffusers import StableDiffusionPipeline
-from pytorch_fid import fid_score
-
-
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
-
-
-logger = logging.getLogger(__name__)
-
-
-def parse_args():
-    """Parse arguments"""
-    parser = argparse.ArgumentParser(description="Example of a post-training quantization script.")
-    parser.add_argument(
-        "--model_name_or_path",
-        type=str,
-        required=True,
-        help="Path to pretrained model or model identifier from huggingface.co/models.",
-    )
-    parser.add_argument(
-        "--input_text",
-        type=str,
-        default="a drawing of a gray and black dragon",
-        help="The input of the model, like: 'a photo of an astronaut riding a horse on mars'.",
-    )
-    parser.add_argument(
-        "--calib_text",
-        type=str,
-        default="Womens Princess Little Deer Native American Costume",
-        help="The calibration data of the model, like: 'Womens Princess Little Deer Native American Costume'.",
-    )
-    parser.add_argument(
-        "--output_dir",
-        type=str,
-        default="saved_results",
-        help="The path to save model and quantization configures.",
-    )
-    parser.add_argument(
-        "--num_images_per_prompt",
-        type=int,
-        default=1,
-        help="The number of images to generate per prompt, defaults to 1",
-    )
-    parser.add_argument(
-        "--seed",
-        type=int,
-        default=666,
-        help="random seed",
-    )
-    parser.add_argument(
-        "--local_rank",
-        type=int,
-        default=-1,
-        help="For distributed: local_rank",
-    )
-    parser.add_argument(
-        "--base_images",
-        type=str,
-        default="base_images",
-        help="Path to training images for FID input.",
-    )
-    parser.add_argument(
-        "--tune",
-        action="store_true",
-        help="Whether or not to apply quantization.",
-    )
-    parser.add_argument(
-        "--quantization_approach",
-        type=str,
-        default="static",
-        help="Quantization approach. Supported approach are static, "
-                  "dynamic and auto.",
-    )
-    parser.add_argument(
-        "--framework",
-        type=str,
-        default="pytorch",
-        help="Deep learning framework. Supported framework are pytorch, ipex",
-    )
-    parser.add_argument(
-        "--metric_name",
-        type=str,
-        default="eval_f1",
-        help="Metric used for the tuning strategy.",
-    )
-    parser.add_argument(
-        "--is_relative",
-        type=bool,
-        default="True",
-        help="Metric tolerance mode, True for relative, otherwise for absolute.",
-    )
-    parser.add_argument(
-        "--perf_tol",
-        type=float,
-        default=0.01,
-        help="Performance tolerance when optimizing the model.",
-    )
-    parser.add_argument(
-        "--benchmark",
-        action="store_true",
-        help="Only test performance for model.",
-    )
-    parser.add_argument(
-        "--accuracy_only",
-        action="store_true",
-        help="Only test accuracy for model.",
-    )
-    parser.add_argument(
-        "--int8",
-        action="store_true",
-        help="benchmark for int8 model",
-    )
-
-    args = parser.parse_args()
-    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
-    if env_local_rank != -1 and env_local_rank != args.local_rank:
-        args.local_rank = env_local_rank
-
-    return args
-
-def image_grid(imgs, rows, cols):
-    if not len(imgs) == rows * cols:
-        raise ValueError("The specified number of rows and columns are not correct.")
-
-    w, h = imgs[0].size
-    grid = Image.new("RGB", size=(cols * w, rows * h))
-    grid_w, grid_h = grid.size
-
-    for i, img in enumerate(imgs):
-        grid.paste(img, box=(i % cols * w, i // cols * h))
-    return grid
-
-
-def benchmark(pipe, generator):
-    warmup = 2
-    total = 5
-    total_time = 0
-    with torch.no_grad():
-        for i in range(total):
-            prompt = "a photo of an astronaut riding a horse on mars"
-            start2 = time.time()
-            images = pipe(prompt, guidance_scale=7.5, num_inference_steps=50, generator=generator).images
-            end2 = time.time()
-            if i >= warmup:
-                total_time += end2 - start2
-    print("Average Latency: ", (total_time) / (total - warmup), "s")
-    print("Average Throughput: {:.5f} samples/sec".format((total - warmup) / (total_time)))
-
-
-def accuracy(pipe, generator, rows, args):
-    with torch.no_grad():
-        new_images = pipe(
-            args.input_text,
-            guidance_scale=7.5,
-            num_inference_steps=50,
-            generator=generator,
-            num_images_per_prompt=args.num_images_per_prompt,
-        ).images
-        tmp_save_images = "tmp_save_images"
-        os.makedirs(tmp_save_images, exist_ok=True)
-        if os.path.isfile(os.path.join(tmp_save_images, "image.png")):
-            os.remove(os.path.join(tmp_save_images, "image.png"))
-        grid = image_grid(new_images, rows=rows, cols=args.num_images_per_prompt // rows)
-        grid.save(os.path.join(tmp_save_images, "image.png"))
-        fid = fid_score.calculate_fid_given_paths((args.base_images, tmp_save_images), 1, "cpu", 2048, 8)
-        print("Finally FID score Accuracy: {}".format(fid))
-        return fid
-
-class CalibDataset(Dataset):
-    def __len__(self):
-        return 1
-
-    def __getitem__(self, idx):
-        data = "a photo of an astronaut riding a horse on mars"
-        return data
-
-
-def main():
-    # Passing the --help flag to this script.
-
-    args = parse_args()
-
-    # Setup logging
-    logging.basicConfig(
-        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
-        datefmt="%m/%d/%Y %H:%M:%S",
-        handlers=[logging.StreamHandler(sys.stdout)],
-    )
-
-    logger.info(f"Parameters {args}")
-
-    # Set seed before initializing model.
-    set_seed(args.seed)
-
-    # Load pretrained model and generate a pipeline
-    #
-    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
-    # download model & vocab.
-    pipe = StableDiffusionPipeline.from_pretrained(args.model_name_or_path)
-    _rows = int(math.sqrt(args.num_images_per_prompt))
-
-    if args.tune:
-        tmp_fp32_images = "tmp_fp32_images"
-        tmp_int8_images = "tmp_int8_images"
-        os.makedirs(tmp_fp32_images, exist_ok=True)
-        os.makedirs(tmp_int8_images, exist_ok=True)
-        generator = torch.Generator("cpu").manual_seed(args.seed)
-        fp32_images = pipe(
-            args.input_text,
-            guidance_scale=7.5,
-            num_inference_steps=50,
-            generator=generator,
-            num_images_per_prompt=args.num_images_per_prompt,
-        ).images
-        grid = image_grid(fp32_images, rows=_rows, cols=args.num_images_per_prompt // _rows)
-        grid.save(os.path.join(tmp_fp32_images, "fp32.png"))
-
-        attr_list = ["unet"]
-        for name in attr_list:
-            model = getattr(pipe, name)
-
-            def calibration_func(model):
-                calib_num = 5
-                setattr(pipe, name, model)
-                with torch.no_grad():
-                    for i in range(calib_num):
-                        pipe(
-                            args.calib_text,
-                            guidance_scale=7.5,
-                            num_inference_steps=50,
-                            generator=generator,
-                            num_images_per_prompt=args.num_images_per_prompt,
-                        )
-
-            def eval_func(model):
-                setattr(pipe, name, model)
-                generator = torch.Generator("cpu").manual_seed(args.seed)
-                with torch.no_grad():
-                    new_images = pipe(
-                        args.input_text,
-                        guidance_scale=7.5,
-                        num_inference_steps=50,
-                        generator=generator,
-                        num_images_per_prompt=args.num_images_per_prompt,
-                    ).images
-                    if os.path.isfile(os.path.join(tmp_int8_images, "int8.png")):
-                        os.remove(os.path.join(tmp_int8_images, "int8.png"))
-                    grid = image_grid(new_images, rows=_rows, cols=args.num_images_per_prompt // _rows)
-                    grid.save(os.path.join(tmp_int8_images, "int8.png"))
-                    fid = fid_score.calculate_fid_given_paths((args.base_images, tmp_int8_images), 1, "cpu", 2048, 8)
-                    return fid
-
-            from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion
-            from neural_compressor.quantization import fit
-            accuracy_criterion = AccuracyCriterion(
-                higher_is_better=False,
-                criterion="relative" if args.is_relative else "absolute",
-                tolerable_loss=args.perf_tol)
-            quantization_config = PostTrainingQuantConfig(
-                approach=args.quantization_approach,
-                accuracy_criterion=accuracy_criterion
-            )
-            model = fit(
-                model=pipe.unet,
-                conf=quantization_config,
-                eval_func=eval_func,
-                calib_func=calibration_func,
-            )
-            setattr(pipe, name, model)
-            model.save(args.output_dir)
-            logger.info(f"Optimized model {name} saved to: {args.output_dir}.")
-
-    if args.benchmark or args.accuracy_only:
-
-        def b_func(model):
-            setattr(pipe, "unet", model)
-            benchmark(pipe, generator)
-
-        if args.int8:
-            print("====int8 inference====")
-            from neural_compressor.utils.pytorch import load
-            checkpoint = os.path.join(args.output_dir)
-            model = load(checkpoint, model=getattr(pipe, "unet"))
-            model.eval()
-        else:
-            print("====fp32 inference====")
-            model = getattr(pipe, "unet")
-        generator = torch.Generator("cpu").manual_seed(args.seed)
-        if args.benchmark:
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-
-            b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
-            fit(model, conf=b_conf, b_func=b_func)
-        if args.accuracy_only:
-            setattr(pipe, "unet", model)
-            accuracy(pipe, generator, _rows, args)
-
-
-def _mp_fn(index):
-    # For xla_spawn (TPUs)
-    main()
-
-
-if __name__ == "__main__":
-    main()
+#!/usr/bin/env python
+# coding=utf-8
+#  Copyright 2021 The HuggingFace Team. All rights reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+""" Example for stable-diffusion to generate a picture from a text ."""
+# You can also adapt this script on your own text to image task. Pointers for this are left as comments.
+
+import argparse
+import logging
+import math
+import os
+import sys
+import time
+
+import torch
+from PIL import Image
+from torch.utils.data import DataLoader, Dataset
+
+from accelerate.utils import set_seed
+from diffusers import StableDiffusionPipeline
+from pytorch_fid import fid_score
+
+
+os.environ["CUDA_VISIBLE_DEVICES"] = ""
+
+
+logger = logging.getLogger(__name__)
+
+
+def parse_args():
+    """Parse arguments"""
+    parser = argparse.ArgumentParser(description="Example of a post-training quantization script.")
+    parser.add_argument(
+        "--model_name_or_path",
+        type=str,
+        required=True,
+        help="Path to pretrained model or model identifier from huggingface.co/models.",
+    )
+    parser.add_argument(
+        "--input_text",
+        type=str,
+        default="a drawing of a gray and black dragon",
+        help="The input of the model, like: 'a photo of an astronaut riding a horse on mars'.",
+    )
+    parser.add_argument(
+        "--calib_text",
+        type=str,
+        default="Womens Princess Little Deer Native American Costume",
+        help="The calibration data of the model, like: 'Womens Princess Little Deer Native American Costume'.",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="saved_results",
+        help="The path to save model and quantization configures.",
+    )
+    parser.add_argument(
+        "--num_images_per_prompt",
+        type=int,
+        default=1,
+        help="The number of images to generate per prompt, defaults to 1",
+    )
+    parser.add_argument(
+        "--seed",
+        type=int,
+        default=666,
+        help="random seed",
+    )
+    parser.add_argument(
+        "--local_rank",
+        type=int,
+        default=-1,
+        help="For distributed: local_rank",
+    )
+    parser.add_argument(
+        "--base_images",
+        type=str,
+        default="base_images",
+        help="Path to training images for FID input.",
+    )
+    parser.add_argument(
+        "--tune",
+        action="store_true",
+        help="Whether or not to apply quantization.",
+    )
+    parser.add_argument(
+        "--quantization_approach",
+        type=str,
+        default="static",
+        help="Quantization approach. Supported approach are static, "
+                  "dynamic and auto.",
+    )
+    parser.add_argument(
+        "--framework",
+        type=str,
+        default="pytorch",
+        help="Deep learning framework. Supported framework are pytorch, ipex",
+    )
+    parser.add_argument(
+        "--metric_name",
+        type=str,
+        default="eval_f1",
+        help="Metric used for the tuning strategy.",
+    )
+    parser.add_argument(
+        "--is_relative",
+        type=bool,
+        default="True",
+        help="Metric tolerance mode, True for relative, otherwise for absolute.",
+    )
+    parser.add_argument(
+        "--perf_tol",
+        type=float,
+        default=0.01,
+        help="Performance tolerance when optimizing the model.",
+    )
+    parser.add_argument(
+        "--benchmark",
+        action="store_true",
+        help="Only test performance for model.",
+    )
+    parser.add_argument(
+        "--accuracy_only",
+        action="store_true",
+        help="Only test accuracy for model.",
+    )
+    parser.add_argument(
+        "--int8",
+        action="store_true",
+        help="benchmark for int8 model",
+    )
+
+    args = parser.parse_args()
+    env_local_rank = int(os.environ.get("LOCAL_RANK", -1))
+    if env_local_rank != -1 and env_local_rank != args.local_rank:
+        args.local_rank = env_local_rank
+
+    return args
+
+def image_grid(imgs, rows, cols):
+    if not len(imgs) == rows * cols:
+        raise ValueError("The specified number of rows and columns are not correct.")
+
+    w, h = imgs[0].size
+    grid = Image.new("RGB", size=(cols * w, rows * h))
+    grid_w, grid_h = grid.size
+
+    for i, img in enumerate(imgs):
+        grid.paste(img, box=(i % cols * w, i // cols * h))
+    return grid
+
+
+def benchmark(pipe, generator):
+    warmup = 2
+    total = 5
+    total_time = 0
+    with torch.no_grad():
+        for i in range(total):
+            prompt = "a photo of an astronaut riding a horse on mars"
+            start2 = time.time()
+            images = pipe(prompt, guidance_scale=7.5, num_inference_steps=50, generator=generator).images
+            end2 = time.time()
+            if i >= warmup:
+                total_time += end2 - start2
+    print("Average Latency: ", (total_time) / (total - warmup), "s")
+    print("Average Throughput: {:.5f} samples/sec".format((total - warmup) / (total_time)))
+
+
+def accuracy(pipe, generator, rows, args):
+    with torch.no_grad():
+        new_images = pipe(
+            args.input_text,
+            guidance_scale=7.5,
+            num_inference_steps=50,
+            generator=generator,
+            num_images_per_prompt=args.num_images_per_prompt,
+        ).images
+        tmp_save_images = "tmp_save_images"
+        os.makedirs(tmp_save_images, exist_ok=True)
+        if os.path.isfile(os.path.join(tmp_save_images, "image.png")):
+            os.remove(os.path.join(tmp_save_images, "image.png"))
+        grid = image_grid(new_images, rows=rows, cols=args.num_images_per_prompt // rows)
+        grid.save(os.path.join(tmp_save_images, "image.png"))
+        fid = fid_score.calculate_fid_given_paths((args.base_images, tmp_save_images), 1, "cpu", 2048, 8)
+        print("Finally FID score Accuracy: {}".format(fid))
+        return fid
+
+class CalibDataset(Dataset):
+    def __len__(self):
+        return 1
+
+    def __getitem__(self, idx):
+        data = "a photo of an astronaut riding a horse on mars"
+        return data
+
+
+def main():
+    # Passing the --help flag to this script.
+
+    args = parse_args()
+
+    # Setup logging
+    logging.basicConfig(
+        format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+        datefmt="%m/%d/%Y %H:%M:%S",
+        handlers=[logging.StreamHandler(sys.stdout)],
+    )
+
+    logger.info(f"Parameters {args}")
+
+    # Set seed before initializing model.
+    set_seed(args.seed)
+
+    # Load pretrained model and generate a pipeline
+    #
+    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
+    # download model & vocab.
+    pipe = StableDiffusionPipeline.from_pretrained(args.model_name_or_path)
+    _rows = int(math.sqrt(args.num_images_per_prompt))
+
+    if args.tune:
+        tmp_fp32_images = "tmp_fp32_images"
+        tmp_int8_images = "tmp_int8_images"
+        os.makedirs(tmp_fp32_images, exist_ok=True)
+        os.makedirs(tmp_int8_images, exist_ok=True)
+        generator = torch.Generator("cpu").manual_seed(args.seed)
+        fp32_images = pipe(
+            args.input_text,
+            guidance_scale=7.5,
+            num_inference_steps=50,
+            generator=generator,
+            num_images_per_prompt=args.num_images_per_prompt,
+        ).images
+        grid = image_grid(fp32_images, rows=_rows, cols=args.num_images_per_prompt // _rows)
+        grid.save(os.path.join(tmp_fp32_images, "fp32.png"))
+
+        attr_list = ["unet"]
+        for name in attr_list:
+            model = getattr(pipe, name)
+
+            def calibration_func(model):
+                calib_num = 5
+                setattr(pipe, name, model)
+                with torch.no_grad():
+                    for i in range(calib_num):
+                        pipe(
+                            args.calib_text,
+                            guidance_scale=7.5,
+                            num_inference_steps=50,
+                            generator=generator,
+                            num_images_per_prompt=args.num_images_per_prompt,
+                        )
+
+            def eval_func(model):
+                setattr(pipe, name, model)
+                generator = torch.Generator("cpu").manual_seed(args.seed)
+                with torch.no_grad():
+                    new_images = pipe(
+                        args.input_text,
+                        guidance_scale=7.5,
+                        num_inference_steps=50,
+                        generator=generator,
+                        num_images_per_prompt=args.num_images_per_prompt,
+                    ).images
+                    if os.path.isfile(os.path.join(tmp_int8_images, "int8.png")):
+                        os.remove(os.path.join(tmp_int8_images, "int8.png"))
+                    grid = image_grid(new_images, rows=_rows, cols=args.num_images_per_prompt // _rows)
+                    grid.save(os.path.join(tmp_int8_images, "int8.png"))
+                    fid = fid_score.calculate_fid_given_paths((args.base_images, tmp_int8_images), 1, "cpu", 2048, 8)
+                    return fid
+
+            from neural_compressor.config import PostTrainingQuantConfig, AccuracyCriterion
+            from neural_compressor.quantization import fit
+            accuracy_criterion = AccuracyCriterion(
+                higher_is_better=False,
+                criterion="relative" if args.is_relative else "absolute",
+                tolerable_loss=args.perf_tol)
+            quantization_config = PostTrainingQuantConfig(
+                approach=args.quantization_approach,
+                accuracy_criterion=accuracy_criterion
+            )
+            model = fit(
+                model=pipe.unet,
+                conf=quantization_config,
+                eval_func=eval_func,
+                calib_func=calibration_func,
+            )
+            setattr(pipe, name, model)
+            model.save(args.output_dir)
+            logger.info(f"Optimized model {name} saved to: {args.output_dir}.")
+
+    if args.benchmark or args.accuracy_only:
+
+        def b_func(model):
+            setattr(pipe, "unet", model)
+            benchmark(pipe, generator)
+
+        if args.int8:
+            print("====int8 inference====")
+            from neural_compressor.utils.pytorch import load
+            checkpoint = os.path.join(args.output_dir)
+            model = load(checkpoint, model=getattr(pipe, "unet"))
+            model.eval()
+        else:
+            print("====fp32 inference====")
+            model = getattr(pipe, "unet")
+        generator = torch.Generator("cpu").manual_seed(args.seed)
+        if args.benchmark:
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+
+            b_conf = BenchmarkConfig(cores_per_instance=4, num_of_instance=1)
+            fit(model, conf=b_conf, b_func=b_func)
+        if args.accuracy_only:
+            setattr(pipe, "unet", model)
+            accuracy(pipe, generator, _rows, args)
+
+
+def _mp_fn(index):
+    # For xla_spawn (TPUs)
+    main()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/pytorch/nlp/huggingface_models/translation/pruning/eager/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/README.md
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/pruning/eager/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/translation/pruning/eager/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/translation/pruning/eager/run_translation_no_trainer.py b/examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/run_translation_no_trainer.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/pruning/eager/run_translation_no_trainer.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/run_translation_no_trainer.py
diff --git a/examples/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_finetune.sh b/examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_finetune.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_finetune.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_finetune.sh
diff --git a/examples/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_prune.sh b/examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_prune.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_prune.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager/scripts/run_translation_prune.sh
diff --git a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md
similarity index 96%
rename from examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md
index 9bc50694ad7..2199c63afe3 100644
--- a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md
+++ b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/README.md
@@ -72,4 +72,4 @@ quantized_model = load(tuned_checkpoint,
                        model)
 ```
 --------
-For more details, please refer to the [sample code](./run_translation.py).
\ No newline at end of file
+For more details, please refer to the [sample code](run_translation.py).
\ No newline at end of file
diff --git a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/requirements.txt b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/requirements.txt
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/requirements.txt
diff --git a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_benchmark.sh
diff --git a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_quant.sh b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_quant.sh
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_translation.py b/examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_translation.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_translation.py
rename to examples/deprecated/pytorch/nlp/huggingface_models/translation/quantization/ptq_dynamic/fx/run_translation.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md
similarity index 98%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md
index fdfe1282b45..52e4e0935fb 100644
--- a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md
+++ b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/README.md
@@ -122,4 +122,4 @@ q_model = load(os.path.abspath(os.path.expanduser(args.tuned_checkpoint)),
             fp32_model,
             dataloader=your_dataloader)
 ```
-Please refer to [Sample code](./pytorch/tools/test_net.py)
+Please refer to [Sample code](pytorch/tools/test_net.py)
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_dataset.sh b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_dataset.sh
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_dataset.sh
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_dataset.sh
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_weights.sh b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_weights.sh
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_weights.sh
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/download_weights.sh
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/LICENSE b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/LICENSE
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/LICENSE
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/LICENSE
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/TROUBLESHOOTING.md b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/TROUBLESHOOTING.md
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/TROUBLESHOOTING.md
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/TROUBLESHOOTING.md
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/maskrnn.patch b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/maskrnn.patch
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/maskrnn.patch
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/maskrnn.patch
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/setup.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/setup.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/setup.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/setup.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/convert_cityscapes_to_coco.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/convert_cityscapes_to_coco.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/convert_cityscapes_to_coco.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/convert_cityscapes_to_coco.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/instances2dict_with_polygons.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/instances2dict_with_polygons.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/instances2dict_with_polygons.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/cityscapes/instances2dict_with_polygons.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/test_net.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/test_net.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/test_net.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/test_net.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_mlperf.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_mlperf.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_mlperf.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_mlperf.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_net.py b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_net.py
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_net.py
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/pytorch/tools/train_net.py
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_and_time.sh b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_and_time.sh
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_and_time.sh
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_and_time.sh
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/object_detection/maskrcnn/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/mlperf.conf b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/mlperf.conf
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/mlperf.conf
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/mlperf.conf
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_dataset.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_dataset.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_dataset.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_dataset.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_loadgen.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_loadgen.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_loadgen.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/prepare_loadgen.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_null.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_null.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_null.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_null.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_onnxruntime.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_onnxruntime.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_onnxruntime.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_onnxruntime.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch_native.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch_native.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch_native.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_pytorch_native.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tf.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tf.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tf.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tf.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tflite.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tflite.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tflite.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/backend_tflite.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/coco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/dataset.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/dataset.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/dataset.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/dataset.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/imagenet.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/imagenet.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/imagenet.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/imagenet.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
similarity index 99%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
index 67159f25a40..cc829f8640b 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
+++ b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/main.py
@@ -50,7 +50,7 @@
         (coco.Coco, dataset.pre_process_coco_mobilenet, coco.PostProcessCoco(),
          {"image_size": [300, 300, 3]}),
     "coco-300-pt":
-        (coco.Coco, dataset.pre_process_coco_pt_mobilenet, coco.PostProcessCocoPt(False,0.3),
+        (coco.Coco, dataset.pre_process_coco_pt_mobilenet, coco.PostProcessCocoPt(False, 0.3),
          {"image_size": [300, 300, 3]}),
     "coco-1200":
         (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCoco(),
@@ -59,7 +59,7 @@
         (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoOnnx(),
          {"image_size": [1200, 1200, 3]}),
     "coco-1200-pt":
-        (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoPt(True,0.05),
+        (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoPt(True, 0.05),
          {"image_size": [1200, 1200, 3],"use_label_map": True}),
     "coco-1200-tf":
         (coco.Coco, dataset.pre_process_coco_resnet34, coco.PostProcessCocoTf(),
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/anchor_generator.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/anchor_generator.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/anchor_generator.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/anchor_generator.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/base_model_r34.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/base_model_r34.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/base_model_r34.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/base_model_r34.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/convert_tf_weights.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/convert_tf_weights.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/convert_tf_weights.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/convert_tf_weights.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_mobilenet_v1.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_mobilenet_v1.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_mobilenet_v1.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_mobilenet_v1.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_r34.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_r34.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_r34.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/ssd_r34.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/models/utils.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/pycoco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/pycoco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/pycoco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/pycoco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/version.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/version.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/version.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/version.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/setup.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/setup.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/setup.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/setup.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-coco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-imagenet.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-imagenet.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-imagenet.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/accuracy-imagenet.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/calibrate_torchvision_model.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/calibrate_torchvision_model.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/calibrate_torchvision_model.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/calibrate_torchvision_model.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ci-run.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ci-run.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ci-run.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ci-run.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/coco-analyze.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/coco-analyze.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/coco-analyze.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/coco-analyze.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/convert-to-onnx.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/convert-to-onnx.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/convert-to-onnx.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/convert-to-onnx.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/lglog2csv.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/lglog2csv.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/lglog2csv.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/lglog2csv.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/make_fake_imagenet.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/make_fake_imagenet.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/make_fake_imagenet.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/make_fake_imagenet.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/mobilenet-to-onnx.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/mobilenet-to-onnx.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/mobilenet-to-onnx.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/mobilenet-to-onnx.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/pylintrc b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/pylintrc
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/pylintrc
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/pylintrc
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet50-to-tflite.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet50-to-tflite.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet50-to-tflite.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet50-to-tflite.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet_save.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet_save.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet_save.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/resnet_save.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ssd-nhwc.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ssd-nhwc.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ssd-nhwc.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/tools/ssd-nhwc.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/README.md b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/README.md
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/README.md
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/README.md
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/coco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale_coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale_coco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale_coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/upscale_coco/upscale_coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/user.conf b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/user.conf
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/user.conf
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/user.conf
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/README.md b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/README.md
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/README.md
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/README.md
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/base_model.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/base_model.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/base_model.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/base_model.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/coco.py
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh
similarity index 98%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh
index e112564098c..e89739127bb 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh
+++ b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_dataset.sh
@@ -1,9 +1,9 @@
-#!/usr/bin/env bash
-### This file is originally from: [mlcommons repo](https://github.com/mlcommons/inference/tree/r0.5/others/cloud/single_stage_detector/download_dataset.sh)
-DATASET_DIR=${DATASET_DIR-$PWD}
-
-dir=$(pwd)
-mkdir -p ${DATASET_DIR}/coco; cd ${DATASET_DIR}/coco
-curl -O http://images.cocodataset.org/zips/val2017.zip; unzip val2017.zip
-curl -O http://images.cocodataset.org/annotations/annotations_trainval2017.zip; unzip annotations_trainval2017.zip
+#!/usr/bin/env bash
+### This file is originally from: [mlcommons repo](https://github.com/mlcommons/inference/tree/r0.5/others/cloud/single_stage_detector/download_dataset.sh)
+DATASET_DIR=${DATASET_DIR-$PWD}
+
+dir=$(pwd)
+mkdir -p ${DATASET_DIR}/coco; cd ${DATASET_DIR}/coco
+curl -O http://images.cocodataset.org/zips/val2017.zip; unzip val2017.zip
+curl -O http://images.cocodataset.org/annotations/annotations_trainval2017.zip; unzip annotations_trainval2017.zip
 cd $dir
\ No newline at end of file
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh
similarity index 98%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh
index c4ec0683d34..e559f406772 100644
--- a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh
+++ b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/download_model.sh
@@ -1,8 +1,8 @@
-#!/usr/bin/env bash
-### This file is originally from: [mlcommons repo](https://github.com/mlcommons/inference/tree/r0.5/others/cloud/single_stage_detector/download_model.sh)
-CHECKPOINT_DIR=${CHECKPOINT_DIR-$PWD}
-
-dir=$(pwd)
-mkdir -p ${CHECKPOINT_DIR}/pretrained; cd ${CHECKPOINT_DIR}/pretrained
-wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=13kWgEItsoxbVKUlkQz4ntjl1IZGk6_5Z'  -O 'resnet34-ssd1200.pth'
+#!/usr/bin/env bash
+### This file is originally from: [mlcommons repo](https://github.com/mlcommons/inference/tree/r0.5/others/cloud/single_stage_detector/download_model.sh)
+CHECKPOINT_DIR=${CHECKPOINT_DIR-$PWD}
+
+dir=$(pwd)
+mkdir -p ${CHECKPOINT_DIR}/pretrained; cd ${CHECKPOINT_DIR}/pretrained
+wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=13kWgEItsoxbVKUlkQz4ntjl1IZGk6_5Z'  -O 'resnet34-ssd1200.pth'
 cd $dir
\ No newline at end of file
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer_weight_sharing.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer_weight_sharing.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer_weight_sharing.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/infer_weight_sharing.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/oob_utils.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/oob_utils.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/oob_utils.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/oob_utils.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/requirements.txt b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/requirements.txt
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/requirements.txt
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_benchmark.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_benchmark.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_quant.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_quant.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_quant.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/run_quant.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd300.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd300.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd300.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd300.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/ssd_r34.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/utils.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/utils.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/utils.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/ptq/ipex/utils.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/README.md b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/README.md
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/README.md
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/README.md
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/download_dataset.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/download_dataset.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/download_dataset.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/download_dataset.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/requirements.txt b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/requirements.txt
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/requirements.txt
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_benchmark.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_benchmark.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_quant.sh b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_quant.sh
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/run_quant.sh
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/base_model.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/base_model.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/base_model.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/base_model.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/coco.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/coco.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/coco.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/coco.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/main.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/mlperf_logger.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/mlperf_logger.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/mlperf_logger.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/mlperf_logger.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/ssd300.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/ssd300.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/ssd300.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/ssd300.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/utils.py b/examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/utils.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/utils.py
rename to examples/deprecated/pytorch/object_detection/ssd_resnet34/quantization/qat/fx/ssd/utils.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/LICENSE b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/LICENSE
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/LICENSE
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/LICENSE
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/README.md b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/README.md
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/README.md
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/README.md
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/YOLOV3_README.md b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/YOLOV3_README.md
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/YOLOV3_README.md
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/YOLOV3_README.md
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/coco.data b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/coco.data
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/coco.data
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/coco.data
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/create_custom_model.sh b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/create_custom_model.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/create_custom_model.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/create_custom_model.sh
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/custom.data b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/custom.data
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/custom.data
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/custom.data
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3-tiny.cfg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3-tiny.cfg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3-tiny.cfg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3-tiny.cfg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3.cfg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3.cfg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3.cfg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/config/yolov3.cfg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/coco.names b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/coco.names
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/coco.names
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/coco.names
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/classes.names b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/classes.names
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/classes.names
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/classes.names
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/images/train.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/images/train.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/images/train.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/images/train.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/labels/train.txt b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/labels/train.txt
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/labels/train.txt
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/labels/train.txt
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/train.txt b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/train.txt
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/train.txt
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/train.txt
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/valid.txt b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/valid.txt
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/valid.txt
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/custom/valid.txt
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/get_coco_dataset.sh b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/get_coco_dataset.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/get_coco_dataset.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/get_coco_dataset.sh
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/dog.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/dog.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/dog.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/dog.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/eagle.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/eagle.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/eagle.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/eagle.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/field.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/field.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/field.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/field.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/giraffe.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/giraffe.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/giraffe.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/giraffe.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/herd_of_horses.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/herd_of_horses.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/herd_of_horses.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/herd_of_horses.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/messi.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/messi.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/messi.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/messi.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/person.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/person.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/person.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/person.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/room.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/room.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/room.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/room.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/street.jpg b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/street.jpg
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/street.jpg
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/data/samples/street.jpg
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/detect.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/detect.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/detect.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/detect.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/models.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/models.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/models.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/models.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/test.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/test.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/test.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/test.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py
similarity index 98%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py
index 00fe3de696d..9d2d7a79e7c 100644
--- a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py
+++ b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/train.py
@@ -5,7 +5,7 @@
 from utils.utils import *
 from utils.datasets import *
 from utils.parse_config import *
-from test import evaluate
+from examples.deprecated.pytorch.object_detection.yolo_v3.quantization.ptq_static.fx.test import evaluate
 
 from terminaltables import AsciiTable
 
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/augmentations.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/augmentations.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/augmentations.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/augmentations.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/datasets.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/datasets.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/datasets.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/datasets.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/logger.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/logger.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/logger.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/logger.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/parse_config.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/parse_config.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/parse_config.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/parse_config.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/utils.py b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/utils.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/utils.py
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/utils.py
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/weights/download_weights.sh b/examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/weights/download_weights.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/weights/download_weights.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/weights/download_weights.sh
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/README.md b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/README.md
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/README.md
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/README.md
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/coco.yaml b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/coco.yaml
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/coco.yaml
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/coco.yaml
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/hyp.scratch-low.yaml b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/hyp.scratch-low.yaml
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/hyp.scratch-low.yaml
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/hyp.scratch-low.yaml
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/requirements.txt b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/requirements.txt
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/requirements.txt
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/requirements.txt
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s6_dense_finetune.sh b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s6_dense_finetune.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s6_dense_finetune.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s6_dense_finetune.sh
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_1x1.sh b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_1x1.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_1x1.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_1x1.sh
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_4x1.sh b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_4x1.sh
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_4x1.sh
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/scripts/yolov5s_coco_4x1.sh
diff --git a/examples/pytorch/object_detection/yolo_v5/pruning/eager/train.py b/examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/train.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v5/pruning/eager/train.py
rename to examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager/train.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/README.md b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/README.md
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/README.md
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/README.md
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/conf.yaml
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/data_loader_terabyte.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/data_loader_terabyte.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/data_loader_terabyte.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/data_loader_terabyte.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/data_utils.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/data_utils.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/data_utils.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/data_utils.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_data_pytorch.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_data_pytorch.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_data_pytorch.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_data_pytorch.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/dlrm_s_pytorch_tune.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/requirements.txt b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/requirements.txt
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/requirements.txt
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_and_time.sh b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_and_time.sh
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_and_time.sh
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_and_time.sh
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_benchmark.sh b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_benchmark.sh
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_quant.sh b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/run_quant.sh
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/run_quant.sh
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/md_embedding_bag.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/md_embedding_bag.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/md_embedding_bag.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/md_embedding_bag.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/qr_embedding_bag.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/qr_embedding_bag.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/qr_embedding_bag.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/fx/tricks/qr_embedding_bag.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/CODE_OF_CONDUCT.md
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/CODE_OF_CONDUCT.md
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/CONTRIBUTING.md
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/CONTRIBUTING.md
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/LICENSE
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/LICENSE
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/README.md b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/README.md
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/README.md
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/README.md
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/conf_ipex.yaml
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_loader_terabyte.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_loader_terabyte.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_utils.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_data_pytorch.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_data_pytorch.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_s_pytorch.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/extend_distributed.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/extend_distributed.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/requirements.txt
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_benchmark.sh b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_benchmark.sh
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh b/examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh
rename to examples/deprecated/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_quant.sh
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md
similarity index 97%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md
index 39a46923f15..dde21cd0ceb 100644
--- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md
+++ b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/README.md
@@ -1,53 +1,53 @@
-Step-by-Step
-============
-
-This document describes the step-by-step instructions for reproducing torchaudio pretrained model wav2vec2.0 and Hubert tuning results with Intel® Neural Compressor.
-
-## Prerequisite
-
-### 1. Environment
-```shell
-# Install Intel® Neural Compressor
-pip install neural-compressor
-```
-```shell
-cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx
-pip install -r requirements.txt
-```
-> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-### 2. Prepare Dataset
-
-Download [LibriSpeech](https://www.openslr.org/resources/12/) Raw audio to dir: /path/to/speech_dataset.  The dir include below folder:
-
-```bash
-cd /path/to/speech_dataset
-wget http://us.openslr.org/resources/12/test-clean.tar.gz
-wget http://us.openslr.org/resources/12/test-other.tar.gz
-tar -zxvf test-clean.tar.gz -C ./
-tar -zxvf test-other.tar.gz -C ./
-
-ls /path/to/speech_dataset
-LibriSpeech  #folder_in_archive
-cd LibriSpeech
-BOOKS.TXT  CHAPTERS.TXT  dataset_infos.json  LICENSE.TXT  README.TXT  SPEAKERS.TXT  test-clean  test-other
-```
-# Run
-
-### 1. Wav2vec2.0
-```bash
-bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=wav2vec2 --output_model=./saved_results
-```
-```bash
-bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=wav2vec2 --output_model=./saved_results --mode=performance
-```
-### 2. Hubert
-
-```bash
-bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results
-```
-```bash
-bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results --mode=performance
-```
-
-
+Step-by-Step
+============
+
+This document describes the step-by-step instructions for reproducing torchaudio pretrained model wav2vec2.0 and Hubert tuning results with Intel® Neural Compressor.
+
+## Prerequisite
+
+### 1. Environment
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+```shell
+cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx
+pip install -r requirements.txt
+```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### 2. Prepare Dataset
+
+Download [LibriSpeech](https://www.openslr.org/resources/12/) Raw audio to dir: /path/to/speech_dataset.  The dir include below folder:
+
+```bash
+cd /path/to/speech_dataset
+wget http://us.openslr.org/resources/12/test-clean.tar.gz
+wget http://us.openslr.org/resources/12/test-other.tar.gz
+tar -zxvf test-clean.tar.gz -C ./
+tar -zxvf test-other.tar.gz -C ./
+
+ls /path/to/speech_dataset
+LibriSpeech  #folder_in_archive
+cd LibriSpeech
+BOOKS.TXT  CHAPTERS.TXT  dataset_infos.json  LICENSE.TXT  README.TXT  SPEAKERS.TXT  test-clean  test-other
+```
+# Run
+
+### 1. Wav2vec2.0
+```bash
+bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=wav2vec2 --output_model=./saved_results
+```
+```bash
+bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=wav2vec2 --output_model=./saved_results --mode=performance
+```
+### 2. Hubert
+
+```bash
+bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results
+```
+```bash
+bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results --mode=performance
+```
+
+
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/requirements.txt b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/requirements.txt
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/requirements.txt
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_asr.py b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_asr.py
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_asr.py
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_asr.py
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_benchmark.sh
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_quant.sh b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_quant.sh
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_quant.sh
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx/run_quant.sh
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md
similarity index 96%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md
index 6821d5790f7..ba8dd1a9fce 100644
--- a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md
+++ b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/README.md
@@ -1,46 +1,46 @@
-Step-by-Step
-============
-
-This document describes the step-by-step instructions for reproducing torchaudio pretrained model wav2vec2.0 and Hubert tuning results with Intel® Neural Compressor.
-
-## Prerequisite
-
-### 1. Environment
-```shell
-# Install Intel® Neural Compressor
-pip install neural-compressor
-```
-```shell
-cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx
-pip install -r requirements.txt
-```
-> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-### 2. Prepare Dataset
-
-Download [LibriSpeech](https://www.openslr.org/resources/12/) Raw audio to dir: /path/to/speech_dataset.  The dir include below folder:
-
-```bash
-cd /path/to/speech_dataset
-wget http://us.openslr.org/resources/12/test-clean.tar.gz
-wget http://us.openslr.org/resources/12/test-other.tar.gz
-tar -zxvf test-clean.tar.gz -C ./
-tar -zxvf test-other.tar.gz -C ./
-
-ls /path/to/speech_dataset
-LibriSpeech  #folder_in_archive
-cd LibriSpeech
-BOOKS.TXT  CHAPTERS.TXT  dataset_infos.json  LICENSE.TXT  README.TXT  SPEAKERS.TXT  test-clean  test-other
-```
-# Run
-
-## Hubert
-
-```bash
-bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results
-```
-```bash
-bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results --mode=performance
-```
-
-
+Step-by-Step
+============
+
+This document describes the step-by-step instructions for reproducing torchaudio pretrained model wav2vec2.0 and Hubert tuning results with Intel® Neural Compressor.
+
+## Prerequisite
+
+### 1. Environment
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+```shell
+cd examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx
+pip install -r requirements.txt
+```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+### 2. Prepare Dataset
+
+Download [LibriSpeech](https://www.openslr.org/resources/12/) Raw audio to dir: /path/to/speech_dataset.  The dir include below folder:
+
+```bash
+cd /path/to/speech_dataset
+wget http://us.openslr.org/resources/12/test-clean.tar.gz
+wget http://us.openslr.org/resources/12/test-other.tar.gz
+tar -zxvf test-clean.tar.gz -C ./
+tar -zxvf test-other.tar.gz -C ./
+
+ls /path/to/speech_dataset
+LibriSpeech  #folder_in_archive
+cd LibriSpeech
+BOOKS.TXT  CHAPTERS.TXT  dataset_infos.json  LICENSE.TXT  README.TXT  SPEAKERS.TXT  test-clean  test-other
+```
+# Run
+
+## Hubert
+
+```bash
+bash run_quant.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results
+```
+```bash
+bash run_benchmark.sh --dataset_location=/path/to/speech_dataset/LibriSpeech/test-clean --input_model=hubert --output_model=./saved_results --mode=performance
+```
+
+
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/requirements.txt
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_asr.py
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_benchmark.sh
diff --git a/examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_quant.sh b/examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_quant.sh
rename to examples/deprecated/pytorch/speech_recognition/torchaudio_models/quantization/ptq_static/fx/run_quant.sh
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md
similarity index 97%
rename from examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md
rename to examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md
index ff166868444..0df64eac5f0 100644
--- a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md
+++ b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/README.md
@@ -1,36 +1,36 @@
-Step-by-Step
-============
-
-This document describes the step-by-step instructions for reproducing [openai/whisper-large](https://huggingface.co/openai/whisper-large) from [transformers](https://github.com/huggingface/transformers.git) tuning results with Intel® Neural Compressor.
-
-## Prerequisite
-
-### 1. Environment
-```shell
-# Install Intel® Neural Compressor
-pip install neural-compressor
-```
-```shell
-cd examples/pytorch/speech_recognition/whisper_large/quantization/ptq_static/fx
-pip install -r requirements.txt
-```
-> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-```
-# Run
-
-`--dataset_location` is used to get local saved huggingface/datasets cache.
-
-```bash
-bash run_quant.sh --topology=whisper_large --output_dir=./saved_results
-```
-
-```bash
-# fp32
-bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=performance
-bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=benchmark
-# int8
-bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=performance --int8=true
-bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=benchmark --int8=true
-```
-
+Step-by-Step
+============
+
+This document describes the step-by-step instructions for reproducing [openai/whisper-large](https://huggingface.co/openai/whisper-large) from [transformers](https://github.com/huggingface/transformers.git) tuning results with Intel® Neural Compressor.
+
+## Prerequisite
+
+### 1. Environment
+```shell
+# Install Intel® Neural Compressor
+pip install neural-compressor
+```
+```shell
+cd examples/pytorch/speech_recognition/whisper_large/quantization/ptq_static/fx
+pip install -r requirements.txt
+```
+> Note: Validated PyTorch [Version](/docs/source/installation_guide.md#validated-software-environment).
+
+```
+# Run
+
+`--dataset_location` is used to get local saved huggingface/datasets cache.
+
+```bash
+bash run_quant.sh --topology=whisper_large --output_dir=./saved_results
+```
+
+```bash
+# fp32
+bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=performance
+bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=benchmark
+# int8
+bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=performance --int8=true
+bash run_benchmark.sh --topology=whisper_large --output_model=./saved_results --mode=benchmark --int8=true
+```
+
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/requirements.txt b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/requirements.txt
similarity index 100%
rename from examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/requirements.txt
rename to examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/requirements.txt
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_benchmark.sh b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_benchmark.sh
similarity index 100%
rename from examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_benchmark.sh
rename to examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_benchmark.sh
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
rename to examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_quant.sh
diff --git a/examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py b/examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
rename to examples/deprecated/pytorch/speech_recognition/whisper_large/quantization/ptq_dynamic/fx/run_whisper_large.py
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
similarity index 75%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
index 057b3559756..aed9c1c2f34 100644
--- a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
@@ -61,6 +61,18 @@ unzip ppi.zip
 
 # Run
 
+## Quantization Config
+
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+    device="gpu",
+    backend="itex",
+    ...
+    )
+```
+
 ## 1. Quantization
   
   ```shell
@@ -87,18 +99,23 @@ For graphsage, we applied the latter one because our philosophy is to enable the
 After prepare step is done, we just need update main.py like below.
 ```python
     if args.tune:
-        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-        from neural_compressor.tensorflow.utils import BaseDataLoader
-
+        from neural_compressor import quantization
+        from neural_compressor.data import DataLoader
+        from neural_compressor.config import PostTrainingQuantConfig  
         dataset = CustomDataset()
-        calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
-        quant_config = StaticQuantConfig()
-        q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+        calib_dataloader=DataLoader(framework='tensorflow', dataset=dataset, \
+                                    batch_size=1, collate_fn = collate_function)          
+        conf = PostTrainingQuantConfig()
+        q_model = quantization.fit(args.input_graph, conf=conf, \
+                                    calib_dataloader=calib_dataloader, eval_func=evaluate)
         q_model.save(args.output_graph)
 
     if args.benchmark:
         if args.mode == 'performance':
-            evaluate(args.input_graph)
+            from neural_compressor.benchmark import fit
+            from neural_compressor.config import BenchmarkConfig
+            conf = BenchmarkConfig()
+            fit(args.input_graph, conf, b_func=evaluate)
         elif args.mode == 'accuracy':
             acc_result = evaluate(args.input_graph)
             print("Batch size = %d" % args.batch_size)
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
similarity index 98%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
index e2a1d28d7d7..3e237dd972e 100644
--- a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
+++ b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
similarity index 82%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
index 87837510d3d..8d80b582352 100644
--- a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
+++ b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -58,7 +58,7 @@
 def prepare_Dataset():
     data_location = args.dataset_location
     pretrained_model = args.input_graph
-    data = dataloader.load_data(prefix=data_location+'/ppi')
+    data = dataloader.load_data(prefix=data_location + '/ppi')
     G = data[0]
     features = data[1]
     id_map = data[2]
@@ -70,13 +70,13 @@ def prepare_Dataset():
 
     context_pairs = data[3]
     placeholders = utils.construct_placeholders(num_classes)
-    minibatch = utils.NodeMinibatchIterator(G, 
-            id_map,
-            placeholders, 
-            class_map,
-            num_classes,
-            batch_size=args.batch_size,
-            context_pairs = context_pairs)
+    minibatch = utils.NodeMinibatchIterator(G,
+                                            id_map,
+                                            placeholders,
+                                            class_map,
+                                            num_classes,
+                                            batch_size=args.batch_size,
+                                            context_pairs = context_pairs)
     return minibatch
 
 class CustomDataset(object):
@@ -110,7 +110,7 @@ def evaluate(model):
     Returns:
         accuracy (float): evaluation result, the larger is better.
     """
-    from neural_compressor.tensorflow import Model
+    from neural_compressor.model import Model
     model = Model(model)
     output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
                         model.output_tensor[0]
@@ -163,22 +163,27 @@ class eval_graphsage_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor.common import set_random_seed
+        from neural_compressor import set_random_seed
         set_random_seed(9527)
         
         if args.tune:
-            from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-            from neural_compressor.tensorflow.utils import BaseDataLoader
-
+            from neural_compressor import quantization
+            from neural_compressor.data import DataLoader
+            from neural_compressor.config import PostTrainingQuantConfig  
             dataset = CustomDataset()
-            calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
-            quant_config = StaticQuantConfig()
-            q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
+            calib_dataloader=DataLoader(framework='tensorflow', dataset=dataset, \
+                                        batch_size=1, collate_fn = collate_function)          
+            conf = PostTrainingQuantConfig()
+            q_model = quantization.fit(args.input_graph, conf=conf, \
+                                       calib_dataloader=calib_dataloader, eval_func=evaluate)
             q_model.save(args.output_graph)
 
         if args.benchmark:
             if args.mode == 'performance':
-                evaluate(args.input_graph)
+                from neural_compressor.benchmark import fit
+                from neural_compressor.config import BenchmarkConfig
+                conf = BenchmarkConfig()
+                fit(args.input_graph, conf, b_func=evaluate)
             elif args.mode == 'accuracy':
                 acc_result = evaluate(args.input_graph)
                 print("Batch size = %d" % args.batch_size)
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
similarity index 99%
rename from examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
rename to examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
index babe7146f5c..7ff4311b80d 100644
--- a/examples/3.x_api/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
+++ b/examples/deprecated/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md
similarity index 95%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md
index 59ad4d5de08..b63c2a736dc 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/README.md
@@ -46,7 +46,7 @@ image recognition
 
 ## 3. Prepare Dataset
 TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
-We also prepared related scripts in [TF image_recognition example](/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
+We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
 
 # Run Command
 
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md
similarity index 95%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md
index a3e1278645c..20adba9545e 100644
--- a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/README.md
@@ -46,7 +46,7 @@ image recognition
 
 ## 3. Prepare Dataset
 TensorFlow [models](https://github.com/tensorflow/models) repo provides [scripts and instructions](https://github.com/tensorflow/models/tree/master/research/slim#an-automated-script-for-processing-imagenet-data) to download, process and convert the ImageNet dataset to the TF records format.
-We also prepared related scripts in [TF image_recognition example](/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
+We also prepared related scripts in [TF image_recognition example](/examples/deprecated/tensorflow/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq#3-prepare-dataset).
 
 # Run Command
 
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/README.md b/examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/ViT/pruning/magnitude/README.md
rename to examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/README.md
diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/main.py b/examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/ViT/pruning/magnitude/main.py
rename to examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/main.py
diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/prepare_model.py b/examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/prepare_model.py
similarity index 100%
rename from examples/tensorflow/image_recognition/ViT/pruning/magnitude/prepare_model.py
rename to examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/prepare_model.py
diff --git a/examples/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt b/examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/ViT/pruning/magnitude/requirements.txt
diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md b/examples/deprecated/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/pruning/magnitude/README.md
diff --git a/examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py b/examples/deprecated/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/pruning/magnitude/main.py
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md b/examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/README.md
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py b/examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/main.py
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/requirements.txt b/examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/quantization/qat/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/requirements.txt
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/quantization/qat/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/resnet_v2/quantization/qat/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/resnet_v2/quantization/qat/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/resnet_v2/quantization/qat/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/efficientnet_v2_b0/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/distillation/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/distillation/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/distillation/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/distillation/run_distillation.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/SavedModel/mobilenet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/build_imagenet_data.py
similarity index 100%
rename from examples/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/build_imagenet_data.py
diff --git a/examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_and_convert_imagenet.sh
similarity index 100%
rename from examples/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_and_convert_imagenet.sh
diff --git a/examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_imagenet.sh
similarity index 100%
rename from examples/keras/image_recognition/imagenet_prepare/download_imagenet.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_imagenet.sh
diff --git a/examples/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
similarity index 100%
rename from examples/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
diff --git a/examples/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_metadata.txt
similarity index 100%
rename from examples/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_metadata.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet169/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet121/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/densenet161/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/efficientnet-b0/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_resnet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v1/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/prepare_dataset.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/prepare_dataset.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/prepare_dataset.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/prepare_model.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/prepare_model.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/prepare_model.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/prepare_model.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_mixed.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_mixed.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_mixed.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_mixed.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/inception_v4/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/export/run_export.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v1/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/mixed_precision/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/mobilenet_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet101/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet50_v1_5/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/README.md b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/README.md
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/main.py b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/main.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_101/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/README.md
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/__init__.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/__init__.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/__init__.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/__init__.py
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/modeling.py
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/optimization.py
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_dataset.py
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_model.sh b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_model.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_model.sh
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/prepare_model.sh
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py
similarity index 99%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py
index 3e2e430407e..88b0d12aa34 100644
--- a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py
+++ b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_classifier.py
@@ -886,7 +886,7 @@ def main(_):
     }
 
     tokenization.validate_case_matches_checkpoint(FLAGS.do_lower_case,
-                                                    FLAGS.init_checkpoint)
+                                                  FLAGS.init_checkpoint)
 
     bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
 
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py b/examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py
rename to examples/deprecated/tensorflow/nlp/bert_base_mrpc/quantization/ptq/tokenization.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/README.md
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/create_pretraining_data.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/create_tf_record.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/evaluate_squad.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/export_classifier.py
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py
similarity index 99%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py
index b278c37581d..a9d64a5765d 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py
@@ -297,7 +297,7 @@ def main(_):
   run_config = tf.compat.v1.estimator.tpu.RunConfig(
       cluster=None,
       master=None,
-      model_dir='./',
+      model_dir='/',
       save_checkpoints_steps=1000,
       session_config=session_config,
       tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/generic_ops.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/modeling.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/optimization.py
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_model.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/tokenization.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py b/examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad/quantization/ptq/tune_squad.py
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
similarity index 82%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
index 41a673fc834..15d84c7f32e 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning result of Intel® Model Zoo bert large model on squad v1.1 task.
@@ -74,7 +74,7 @@ bash prepare_dataset.sh --output_dir=./data
 ### Convert the dataset to TF Record format
 After the dataset is downloaded by either of ways above, the dataset should be converted to files of TF Record format.
 ```shell
-python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=data/eval.tf_record
+python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./eval.tf_record
 ```
 
 # Run Command
@@ -82,11 +82,22 @@ python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v
 
 ## Quantization
   ```shell
-  bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=data
+  bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=/path/to/evaluation/dataset
   ```
 
+### Quantization Config
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+    device="gpu",
+    backend="itex",
+    ...
+    )
+```
+
 ## Benchmark
   ```shell
-  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=data --batch_size=64
-  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=data --batch_size=64
+  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=64
+  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=64
   ```
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
similarity index 99%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
index 8adecb971fd..8fb20e36c59 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2022 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
similarity index 99%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
index 12c6486283d..d24d701d953 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2022 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -113,7 +113,7 @@ def __init__(self,
     self.is_impossible = is_impossible
 
 
-def read_squad_examples(input_file, is_training=None):
+def read_squad_examples(input_file, is_training):
   """Read a SQuAD json file into a list of SquadExample."""
   with tf.io.gfile.GFile(input_file, "r") as reader:
     input_data = json.load(reader)["data"]
@@ -196,8 +196,8 @@ def is_whitespace(c):
 
 
 def convert_examples_to_features(examples, tokenizer, max_seq_length,
-                                 doc_stride, max_query_length, is_training=None,
-                                 output_fn=None):
+                                 doc_stride, max_query_length, is_training,
+                                 output_fn):
   """Loads a data file into a list of `InputBatch`s."""
 
   unique_id = 1000000000
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/evaluate_squad.py
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/export_classifier.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py
similarity index 99%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py
index b278c37581d..a9d64a5765d 100644
--- a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/freeze_estimator_to_pb.py
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/freeze_estimator_to_pb.py
@@ -297,7 +297,7 @@ def main(_):
   run_config = tf.compat.v1.estimator.tpu.RunConfig(
       cluster=None,
       master=None,
-      model_dir='./',
+      model_dir='/',
       save_checkpoints_steps=1000,
       session_config=session_config,
       tpu_config=tf.compat.v1.estimator.tpu.TPUConfig(
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/generic_ops.py
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/modeling.py
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/optimization.py
diff --git a/examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad/quantization/ptq/prepare_dataset.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_dataset.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/prepare_model.sh
diff --git a/examples/helloworld/tf_example4/requirements.txt b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/helloworld/tf_example4/requirements.txt
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
similarity index 96%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
index aa8d269a79a..b367b93247b 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
@@ -38,7 +38,7 @@ function init_params {
 # run_tuning
 function run_benchmark {
 
-    python main.py \
+    python tune_squad.py \
       --input_model=${input_model} \
       --mode=${mode} \
       --dataset_location=${dataset_location} \
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
similarity index 96%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
index ddc30b40177..bee4ecaf784 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
@@ -34,7 +34,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python main.py \
+    python tune_squad.py \
       --input_model=${input_model} \
       --output_model=${output_model} \
       --dataset_location=${dataset_location} \
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
similarity index 99%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
index 77c3175db07..dbf083617f5 100644
--- a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
+++ b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2022 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py b/examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py
rename to examples/deprecated/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tune_squad.py
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/README.md
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/download_dataset.py
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_inference.py
diff --git a/examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/distilbert_base/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
similarity index 76%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
index 7a8c22631e0..24b558ed032 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor smooth quantization of language models gpt-j-6B.
@@ -11,7 +11,7 @@ This document is used to list steps of reproducing TensorFlow Intel® Neural Com
 ```shell
 # Install Intel® Neural Compressor
 pip install neural-compressor
-pip install -r requirements.txt
+pip install -r requirements
 ```
 
 ## 2. Prepare Pretrained model
@@ -91,6 +91,16 @@ class MyDataloader:
         return self.length
 ```
 
+### Quantization Config
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```python
+config = PostTrainingQuantConfig(
+    device="gpu",
+    backend="itex",
+    ...
+    )
+```
 
 ### Code Update
 After prepare step is done, we add the code for quantization tuning to generate quantized model.
@@ -114,24 +124,30 @@ To apply quantization, the function that maps names from AutoTrackable variables
 
 Please use the recipe to set smooth quantization.
 ```python
-    from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
-    from neural_compressor.tensorflow.quantization import TuningConfig
-    from neural_compressor.tensorflow.utils import BaseDataLoader
-
+    from neural_compressor import quantization, PostTrainingQuantConfig
     calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)  
-    quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
-    tune_config = TuningConfig(config_set=quant_config, max_trials=1)
+    recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': 0.52705}}
+    conf = PostTrainingQuantConfig(quant_level=1, 
+                                    excluded_precisions=["bf16"],##use basic tuning
+                                    recipes=recipes,
+                                    calibration_sampling_size=[1],
+                                    )
+    
     model.weight_name_mapping = weight_name_mapping
-    q_model = autotune(model, 
-                        tune_config, 
-                        eval_fn=evaluate,
-                        calib_dataloader=calib_dataloader)
+    q_model = quantization.fit( model,
+                                conf,
+                                eval_func=evaluate,
+                                calib_dataloader=calib_dataloader)
+
     q_model.save(run_args.output_model)
 ```
 #### Benchmark
 ```python
     if run_args.mode == "performance":
-        evaluate(model.model)
+        from neural_compressor.benchmark import fit
+        from neural_compressor.config import BenchmarkConfig
+        conf = BenchmarkConfig(warmup=10, iteration=run_args.iteration, cores_per_instance=4, num_of_instance=1)
+        fit(model, conf, b_func=evaluate)
     elif run_args.mode == "accuracy":
         acc_result = evaluate(model.model)
         print("Batch size = %d" % run_args.batch_size)
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
similarity index 90%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
index faf54b65bd0..3de5c654d58 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -276,10 +276,6 @@ def evaluate(model, tf_eval_dataset=mydata):
     iteration = run_args.iteration
     correct = 0
     latency_list = []
-    from neural_compressor.tensorflow.utils import BaseModel
-
-    if isinstance(model, BaseModel):
-        model = model.model
     infer = model.signatures["serving_default"]
     for idx, data in enumerate(tf_eval_dataset):
         input_ids = tf.convert_to_tensor([data[:-1]], dtype=tf.int32)
@@ -320,26 +316,35 @@ def main():
     with train_args.strategy.scope():
         options = tf.data.Options()
         options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
-        from neural_compressor.tensorflow import Model
+        from neural_compressor import Model
         model = Model(run_args.input_model, modelType='llm_saved_model')
 
         if run_args.tune:
-            from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
-            from neural_compressor.tensorflow.quantization import TuningConfig
-            from neural_compressor.tensorflow.utils import BaseDataLoader
+            from neural_compressor.config import AccuracyCriterion
+            from neural_compressor import quantization, PostTrainingQuantConfig
 
             calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)  
-            quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
-            tune_config = TuningConfig(config_set=quant_config, max_trials=1)
+            recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': 0.52705}}
+            conf = PostTrainingQuantConfig(quant_level=1, 
+                                            excluded_precisions=["bf16"],##use basic tuning
+                                            recipes=recipes,
+                                            calibration_sampling_size=[1],
+                                            accuracy_criterion=AccuracyCriterion()
+                                            )
+            
             model.weight_name_mapping = weight_name_mapping
-            q_model = autotune(model, 
-                               tune_config, 
-                               eval_fn=evaluate,
-                               calib_dataloader=calib_dataloader)
+            q_model = quantization.fit( model,
+                                        conf,
+                                        eval_func=evaluate,
+                                        calib_dataloader=calib_dataloader)
+
             q_model.save(run_args.output_model)
         if run_args.benchmark:
             if run_args.mode == "performance":
-                evaluate(model.model)
+                from neural_compressor.benchmark import fit
+                from neural_compressor.config import BenchmarkConfig
+                conf = BenchmarkConfig(warmup=10, iteration=run_args.iteration, cores_per_instance=4, num_of_instance=1)
+                fit(model, conf, b_func=evaluate)
             elif run_args.mode == "accuracy":
                 acc_result = evaluate(model.model)
                 print("Batch size = %d" % run_args.batch_size)
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
similarity index 95%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
index cb4cd7f3f29..12ac19cf4ba 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.sh
diff --git a/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
new file mode 100644
index 00000000000..186124490e2
--- /dev/null
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
@@ -0,0 +1,4 @@
+tensorflow==2.12.1
+transformers
+datasets
+numpy
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
similarity index 96%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
index fa45adbd5ef..386371f8eb5 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor quantization and smooth quantization of language models such as OPT and GPT2.
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
similarity index 83%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
index eda0d222d39..acf3299689b 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
@@ -1,20 +1,3 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2024 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
 import os.path
 import transformers
 import tensorflow as tf
@@ -28,7 +11,7 @@
 sys.path.insert(0, './')
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--optimized', action='store_true', help="eval fp32 model or optimized model")
+parser.add_argument('--int8', action='store_true', help="eval fp32 model or int8 model")
 parser.add_argument('--model_name_or_path', type=str, default='facebook/opt-125m')
 parser.add_argument('--batch_size', type=int, default=16)
 parser.add_argument('--warmup', type=int, default=10)
@@ -64,6 +47,7 @@ def get_attention_mask(self, input_ids):
         return tf.constant(1 - (input_ids==1).numpy().astype(int))
     
     def evaluate_tf_v1(self, model):
+        # return 0.99 # TODO debug remove
         total, hit = 0, 0
         index = 1
         infer = model.signatures["serving_default"]
@@ -173,18 +157,16 @@ def __len__(self):
 
 evaluator = Evaluator(eval_dataset, tokenizer, 'cpu')
 
-if args.optimized:
-    print("benchmarking optimized model")
-    optimized_folder = model_name.split('/')[-1] + "_int8"
-    if not os.path.exists(optimized_folder):
-        print(f"could not find optimized folder {optimized_folder} ")
+if args.int8:
+    print("benchmarking int8 model")
+    int8_folder = model_name.split('/')[-1] + "_int8"
+    if not os.path.exists(int8_folder):
+        print(f"could not find int8 folder {int8_folder} ")
         exit()
-    model = tf.saved_model.load(optimized_folder)    # tensorflow.python.trackable.autotrackable.AutoTrackable object
+    model = tf.saved_model.load(int8_folder)    # tensorflow.python.trackable.autotrackable.AutoTrackable object
 else:
     print("benchmaking fp32 model")
-    model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
-    from neural_compressor.tensorflow import Model
-
-    model = Model(model).model # tensorflow.python.trackable.autotrackable.AutoTrackable object
+    print("Benchmaking fp32 model is not longer supported in this example. please go to 3.x_api examples or raise issue to us.")
+    exit(0)
 
 evaluator.evaluate_tf_v1(model)
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
similarity index 54%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
index 8f012ceb404..b88cd9f7a09 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2024 Intel Corporation
+# Copyright (c) 2023 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,7 +28,10 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--sq', action='store_true', default=False, help="whether to use smooth quant")
+# parser.add_argument('--calib_num', type=int, default=100, help="calibration num for sq")
 parser.add_argument('--model_name_or_path', type=str, default="facebook/opt-125m")
+# TODO auto tuning not supported currently for TF backend
+# parser.add_argument('--alpha', default=0.5, help="Set alpha=auto to use alpha tuning.")
 parser.add_argument('--alpha', type=float, default=0.5, help="alpha value for smoothing.")
 parser.add_argument('--log_frequency', type=int, default=100)
 parser.add_argument('--batch_size', type=int, default=16)
@@ -36,6 +39,58 @@
 parser.add_argument('--fallback_add', action='store_true', default=False, help="Whether to add fp32 fallback option" )
 args = parser.parse_args()
 
+class Evaluator:
+    def __init__(self, dataset, tokenizer, device, batch_size=args.batch_size):
+        self.dataset = dataset
+        self.tokenizer = tokenizer
+        self.device = device
+        self.dataloader = CustomDataloader(dataset, tokenizer, batch_size, device)
+
+    def evaluate(self, model):
+        # model.eval()
+        # The task is to predict the last word of the input.
+        total, hit = 0, 0
+        index = 1
+        for input_ids, label, label_indices in tqdm(self.dataloader):
+            # TFCausalLMOutputWithPast len: 2
+            # first element shape (16, 196, 50272)
+            # second element shape (16, 12, 196, 64)
+            outputs = model(input_ids)
+            last_token_logits = outputs[0].numpy()[np.arange(len(label_indices)), label_indices, :]
+            pred = last_token_logits.argmax(axis=-1)
+            total += label.shape[0]
+            hit += (pred == label.numpy()).sum().item()
+            if index % args.log_frequency == 0:
+                print(hit / total, flush=True)
+            index += 1
+        acc = hit / total
+        print(acc, flush=True)
+        return acc
+    
+    def get_attention_mask(self, input_ids):
+        return tf.constant(1 - (input_ids==1).numpy().astype(int))
+    
+    def evaluate_tf_v1(self, model):
+        # return 0.99 # TODO debug remove
+        total, hit = 0, 0
+        index = 1
+        infer = model.signatures["serving_default"]
+        for input_ids, label, label_indices in tqdm(self.dataloader):
+            attention_mask = self.get_attention_mask(input_ids)
+            input_ids = tf.constant(input_ids.numpy(), dtype=infer.inputs[0].dtype)
+            attention_mask = tf.constant(attention_mask.numpy(), dtype=infer.inputs[0].dtype)
+            results = infer(input_ids=input_ids, attention_mask=attention_mask) # len: 25 Identity: [16, 196, 50272], Identity_1: [16, 12, 196, 64]
+            last_token_logits = results['Identity'].numpy()[np.arange(len(label_indices)), label_indices, :]
+            pred = last_token_logits.argmax(axis=-1)
+            total += label.shape[0]
+            hit += (pred == label.numpy()).sum().item()
+            if index % args.log_frequency == 0:
+                print(hit / total, flush=True)
+            index += 1
+        acc = hit / total
+        print(acc, flush=True)
+        return acc
+
 class CustomDataloader:
     # for_calib=True in quantization, only input_id is needed, =False in evaluation need label
     def __init__(self, dataset, tokenizer, batch_size=1, device='cpu', for_calib=False):
@@ -72,9 +127,13 @@ def pad_input(self, input): # input: a record
     def __iter__(self):
         if self.for_calib:
             labels = None
+            # label_indices = None
             for idx, record in enumerate(self.dataset):
                 input_id, label, label_index = self.pad_input(record)
                 attention_mask = self.get_attention_mask(input_id)
+                # compose attention_mask and input_id together
+                # during the calibration, it requires to yield a <attention_mask, input_id>
+                # cur_input = tf.constant(np.append(attention_mask, input_id.numpy(), axis=0))
                 cur_input = {"input_ids": input_id.numpy(), "attention_mask": attention_mask}
                 assert self.batch_size == 1
                 yield (cur_input, label)
@@ -110,31 +169,42 @@ def __len__(self):
 
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
 model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
+eval_dataset = load_dataset('lambada', split='validation')
+
+# model.eval()
 
-calib_dataset = load_dataset('lambada', split='validation')
+evaluator = Evaluator(eval_dataset, tokenizer, 'cpu')
+
+calib_dataset = load_dataset('lambada', split='train')
+# calib_dataset = eval_dataset  # TODO for debug
 calib_dataset = calib_dataset.shuffle(seed=42)
 calib_dataloader = CustomDataloader(calib_dataset, tokenizer, device='cpu', batch_size=1, for_calib=True)
 
-from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model
+def eval_func(model):
+    acc = evaluator.evaluate_tf_v1(model)
+    return acc
 
-ptq_config = None
-quant_config = []
+from neural_compressor import PostTrainingQuantConfig
+from neural_compressor.config import AccuracyCriterion
+from neural_compressor import quantization
 
+recipes = {}
 if args.sq:
-    quant_config.append(SmoothQuantConfig(alpha=args.alpha))
+    recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': args.alpha}}
+op_type_dict = {}
 if args.kl:
-    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8", act_algorithm="kl")
+    op_type_dict = {'linear': {'activation': {'algorithm': ['kl']}}}
 if args.fallback_add:
-    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
-    ptq_config.set_local("Add", StaticQuantConfig(act_dtype="fp32", weight_dtype="fp32"))
-
-if not ptq_config:
-    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
-quant_config.append(ptq_config)
-
-q_model = quantize_model(model, 
-                         quant_config, 
-                         calib_dataloader=calib_dataloader)
+    op_type_dict["add"] = {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}
+conf = PostTrainingQuantConfig(quant_level=1, excluded_precisions=["bf16"],##use basic tuning
+                                recipes=recipes,
+                                op_type_dict=op_type_dict, accuracy_criterion=AccuracyCriterion(
+tolerable_loss=0.011,      # TODO remove for debug
+))
 
+q_model = quantization.fit(model,
+                            conf,
+                            calib_dataloader=calib_dataloader,
+                            eval_func=eval_func)
 save_model_name = model_name.split("/")[-1]
 q_model.save(f"{save_model_name}_int8")
diff --git a/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
new file mode 100644
index 00000000000..fb574f91ff5
--- /dev/null
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow
+datasets
+transformers
\ No newline at end of file
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
similarity index 81%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
index fed1991e4ca..b8fad17eebd 100644
--- a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
+++ b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
@@ -10,7 +10,7 @@ function main {
 
 # init params
 function init_params {
-  optimized=false
+  int8=false
   batch_size=16
   for var in "$@"
   do
@@ -18,8 +18,8 @@ function init_params {
       --input_model=*)
           input_model=$(echo $var |cut -f2 -d=)
       ;;
-      --optimized=*)
-          optimized=$(echo $var |cut -f2 -d=)
+      --int8=*)
+          int8=$(echo $var |cut -f2 -d=)
       ;;
       --batch_size=*)
           batch_size=$(echo $var |cut -f2 -d=)
@@ -31,11 +31,11 @@ function init_params {
 
 # run_tuning
 function run_benchmark {
-  if [[ "${optimized}" == "true" ]]; then
+  if [[ "${int8}" == "true" ]]; then
      python benchmark.py \
         --model_name_or_path ${input_model} \
         --batch_size ${batch_size} \
-        --optimized
+        --int8
   else
      python benchmark.py \
         --model_name_or_path ${input_model} \
diff --git a/examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh b/examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
similarity index 58%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
index 544e954371e..fb672168204 100644
--- a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of Transformer-LT. This example can run on Intel CPUs and GPUs.
@@ -50,30 +50,30 @@ tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz
 Dataset is in data folder, pretrained model is in graph folder.
 
 #### Automatic dataset & model download
-Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`.
+Run the `prepare_dataset_model.sh` script located in `examples/tensorflow/nlp/transformer_lt/quantization/ptq`.
 
 ```shell
-cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq
+cd examples/tensorflow/nlp/transformer_lt/quantization/ptq
 bash prepare_dataset_model.sh
 ```
 
 ## Run Command
-### Quantization
 
 ```shell
-bash run_quant.sh --input_model=./model/fp32_graphdef.pb --dataset_location=./data --output_model=./model/int8_graphdef.pb
-```
-### Benchmark
-```shell
-bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=performance
-
-bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=accuracy --batch_size=1
+python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --tune
 ```
 
 Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow.
 =========================
 
 This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor.
+## User Code Analysis
+1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file.
+
+2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
+
+For transformer-lt, we applied the latter one because we don't have dataset and metric for transformer-lt. The task is to implement the *q_dataloader* and *eval_func*.
+
 
 ### q_dataloader Part Adaption
 Below dataset class uses getitem to provide the model with input.
@@ -96,6 +96,19 @@ class Dataset(object):
 ### Evaluation Part Adaption
 We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py
 
+### Quantization Config
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+    device="gpu",
+    backend="itex",
+	inputs=['input_tensor'],
+    outputs=['model/Transformer/strided_slice_19'],
+    ...
+    )
+```
+
 Here we set the input tensor and output tensors name into *inputs* and *outputs* args.
 In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object.
 
@@ -104,16 +117,17 @@ After prepare step is done, we add tune code to generate quantized model.
 
 #### Tune
 ```python
-    from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
-
-    dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-    calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
-
-    quant_config = StaticQuantConfig()
-    model = Model(graph)
-    model.input_tensor_names = ['input_tensor']
-    model.output_tensor_names = ['model/Transformer/strided_slice_19']
-    q_model = quantize_model(model, quant_config, calib_dataloader)
+    from neural_compressor import quantization
+    from neural_compressor.data import DataLoader
+    from neural_compressor.config import PostTrainingQuantConfig
+    ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+    calib_dataloader = DataLoader(dataset=ds, collate_fn=collate_fn, \
+                                    batch_size=FLAGS.batch_size, framework='tensorflow')
+    conf = PostTrainingQuantConfig(inputs=['input_tensor'],
+                                    outputs=['model/Transformer/strided_slice_19'],
+                                    calibration_sampling_size=[500])
+    q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader,
+                eval_func=eval_func)
     try:
         q_model.save(FLAGS.output_model)
     except Exception as e:
@@ -121,10 +135,11 @@ After prepare step is done, we add tune code to generate quantized model.
 ```
 #### Benchmark
 ```python
-    if FLAGS.benchmark:
-        assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
-        "Benchmark only supports performance or accuracy mode."
-        acc = eval_func(graph)
-        if FLAGS.mode == 'accuracy':
-            print('Accuracy is {:.3f}'.format(acc))
+    from neural_compressor.benchmark import fit
+    from neural_compressor.config import BenchmarkConfig
+    if FLAGS.mode == 'performance':
+        fit(graph, conf=BenchmarkConfig(), b_func=eval_func)
+    elif FLAGS.mode == 'accuracy':
+        eval_func(graph)
 ```
+The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint.
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
similarity index 86%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
index 58a93090e7a..6d8b92d1317 100644
--- a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
+++ b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
@@ -30,7 +30,7 @@
 from utils import metrics
 from utils import tokenizer
 from utils.tokenizer import Subtokenizer
-from neural_compressor.tensorflow.utils import BaseDataLoader
+from neural_compressor.data import DataLoader
 
 flags = tf.compat.v1.flags
 FLAGS = flags.FLAGS
@@ -143,7 +143,9 @@ def eval_func(infer_graph, iteration=-1):
         'model/Transformer/strided_slice_19:0')
 
     ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-    dataloader = BaseDataLoader(dataset=ds, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+    dataloader = DataLoader(framework='tensorflow', dataset=ds,
+                                batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+
     config = tf.compat.v1.ConfigProto()
     config.use_per_session_threads = 1
     config.inter_op_parallelism_threads = 1
@@ -187,6 +189,7 @@ def eval_func(infer_graph, iteration=-1):
                     except:
                         decode.append(subtokenizer.decode(otr))
         bleu_eval.update(decode, labels)
+        print('Accuracy is {:.3f}'.format(bleu_eval.result()))
         return bleu_eval.result()
 
 class Dataset(object):
@@ -232,16 +235,16 @@ def __len__(self):
 def main(_):
     graph = load_graph(FLAGS.input_graph)
     if FLAGS.tune:
-        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
-
-        dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-        calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
-
-        quant_config = StaticQuantConfig()
-        model = Model(graph)
-        model.input_tensor_names = ['input_tensor']
-        model.output_tensor_names = ['model/Transformer/strided_slice_19']
-        q_model = quantize_model(model, quant_config, calib_dataloader)
+        from neural_compressor import quantization
+        from neural_compressor.config import PostTrainingQuantConfig
+        ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+        calib_dataloader = DataLoader(framework='tensorflow', dataset=ds, \
+                                        batch_size=FLAGS.batch_size, collate_fn=collate_fn,)										
+        conf = PostTrainingQuantConfig(inputs=['input_tensor'],
+                                        outputs=['model/Transformer/strided_slice_19'],
+                                        calibration_sampling_size=[500])       
+        q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader,
+                    eval_func=eval_func)
         try:
             q_model.save(FLAGS.output_model)
         except Exception as e:
@@ -250,9 +253,13 @@ def main(_):
     if FLAGS.benchmark:
         assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
         "Benchmark only supports performance or accuracy mode."
-        acc = eval_func(graph)
-        if FLAGS.mode == 'accuracy':
-            print('Accuracy is {:.3f}'.format(acc))
+        from neural_compressor.benchmark import fit
+        from neural_compressor.config import BenchmarkConfig
+        if FLAGS.mode == 'performance':
+            conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1)
+            fit(graph, conf, b_func=eval_func)
+        elif FLAGS.mode == 'accuracy':
+            eval_func(graph)
 
 if __name__ == "__main__":
     tf.compat.v1.app.run()
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/prepare_dataset_model.sh
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/metrics.py
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer.py
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py b/examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt/quantization/ptq/utils/tokenizer_test.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/README.md
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/__init__.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/__init__.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/__init__.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/__init__.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_gnmt_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_gnmt_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_gnmt_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_gnmt_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_maskrcnn_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_maskrcnn_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_maskrcnn_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_maskrcnn_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ncf_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ncf_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ncf_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ncf_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_resnet_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_resnet_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_resnet_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_resnet_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ssd_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ssd_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ssd_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_ssd_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_transformer_tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_transformer_tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_transformer_tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/_transformer_tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/mlperf_log.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/mlperf_log.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/mlperf_log.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/mlperf_log.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/resnet_log_helper.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/resnet_log_helper.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/resnet_log_helper.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/resnet_log_helper.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tags.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tags.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tags.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tags.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/test_tag_set.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/test_tag_set.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/test_tag_set.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/test_tag_set.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tf_mlperf_log.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tf_mlperf_log.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tf_mlperf_log.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/mlperf_compliance/tf_mlperf_log.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_inference.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/__init__.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq/utils/__init__.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/__init__.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/dataset.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/dataset.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/dataset.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/dataset.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/metrics.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/metrics.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/metrics.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/metrics.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer.py
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer_test.py b/examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer_test.py
similarity index 100%
rename from examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer_test.py
rename to examples/deprecated/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/tokenizer_test.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_inception_v2/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/prepare_dataset.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/prepare_dataset.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/prepare_dataset.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/requirements.txt b/examples/deprecated/tensorflow/object_detection/tensorflow_models/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/requirements.txt
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/requirements.txt
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet101/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/label_map.yaml b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/label_map.yaml
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/label_map.yaml
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/label_map.yaml
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet34/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/main.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/main.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/prepare_model.py b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/prepare_model.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/README.md
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/coco_constants.py b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/coco_constants.py
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/coco_constants.py
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/coco_constants.py
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/infer_detections.py b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/infer_detections.py
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/infer_detections.py
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/infer_detections.py
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/prepare_dataset.sh b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/prepare_dataset.sh
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/prepare_dataset.sh
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/prepare_dataset.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/requirements.txt b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/requirements.txt
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py b/examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py
rename to examples/deprecated/tensorflow/object_detection/yolo_v3/quantization/ptq/utils.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/README.md b/examples/deprecated/tensorflow/oob_models/quantization/ptq/README.md
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/README.md
diff --git a/examples/tensorflow/oob_models/quantization/ptq/dataloaders.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/dataloaders.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/dataloaders.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/dataloaders.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/find_outputs.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/find_outputs.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/find_outputs.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/find_outputs.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/model_detail.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/model_detail.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/model_detail.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/model_detail.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/oob_models/quantization/ptq/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/oob_models/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/oob_models/quantization/ptq/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/tf_benchmark.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/tf_savemodel_benchmark.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/tf_savemodel_benchmark.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/tf_savemodel_benchmark.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/tf_savemodel_benchmark.py
diff --git a/examples/tensorflow/oob_models/quantization/ptq/utils.py b/examples/deprecated/tensorflow/oob_models/quantization/ptq/utils.py
similarity index 100%
rename from examples/tensorflow/oob_models/quantization/ptq/utils.py
rename to examples/deprecated/tensorflow/oob_models/quantization/ptq/utils.py
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
similarity index 88%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
index 7bff08a2f84..23ddfcaeb6a 100644
--- a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Wide & Deep tuning zoo result.
@@ -41,7 +41,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu]
 
 ### Install Additional Dependency packages
 ```shell
-cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq
+cd examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq
 pip install -r requirements.txt
 ```
 
@@ -87,6 +87,17 @@ Two .tfrecords files are generated and will be used later on:
   bash run_quant.sh --dataset_location=/path/to/datasets --input_model=/path/to/wide_deep_fp32_pretrained_model.pb --output_model=./wnd_int8_opt.pb
   ```
 
+### Quantization Config
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+        device="gpu",
+        backend="itex",
+        ...
+        )
+```
+
 ## Benchmark
   ```
   bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=accuracy --batch_size=500
diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
similarity index 100%
rename from examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/inference.py
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py
similarity index 100%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/preprocess_csv_tfrecords.py
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
similarity index 97%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
index 72ab01f2a19..85ae3005205 100644
--- a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
+++ b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
@@ -46,7 +46,7 @@ function define_mode {
 # run_tuning
 function run_benchmark {
     #numactl -N 0 -m 0 \
-    python main.py \
+    python inference.py \
             --input_graph ${input_model} \
             --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
             --batch_size ${batch_size} \
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
similarity index 97%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
index a8068917a27..685aad45c63 100644
--- a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
+++ b/examples/deprecated/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
@@ -35,7 +35,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python main.py \
+    python inference.py \
             --input_graph ${input_model} \
             --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
             --calibration_data_location ${dataset_location}/train_processed_data.tfrecords \
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
similarity index 80%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
index 4307ec85480..ebb765aad7c 100644
--- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of 3dunet-mlperf.
@@ -14,9 +14,9 @@ This example can run on Intel CPUs and GPUs.
 pip install neural-compressor
 ```
 
-### Install requirements
+### Install Intel Tensorflow
 ```shell
-pip install -r requirements.txt
+pip install intel-tensorflow
 ```
 > Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
 
@@ -59,18 +59,27 @@ pip install --upgrade intel-extension-for-tensorflow[cpu]
 
 
 # Run command
-Please set the following environment variables before running quantization or benchmark commands:
-
-* `export nnUNet_preprocessed=<path/to/build>/build/preprocessed_data`
-* `export nnUNet_raw_data_base=<path/to/build>/build/raw_data`
-* `export RESULTS_FOLDER=<path/to/build>/build/result`
 
 ## Quantization
 
-`bash run_quant.sh --input_model=3dunet_dynamic_ndhwc.pb --dataset_location=<path/to/build>/build --output_model=3dunet_dynamic_ndhwc_int8.pb`
+
+### Quantization Config
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+   device="gpu",
+   backend="itex",
+   ...
+   )
+```
+
 
 ## Benchmark
 
-`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=<path/to/build>/build --batch_size=100 --iters=500 --mode=benchmark`
+* `export nnUNet_preprocessed=<path/to/build>/build/preprocessed_data`
+* `export nnUNet_raw_data_base=<path/to/build>/build/raw_data`
+* `export RESULTS_FOLDER=<path/to/build>/build/result`
+* `pip install -r requirements.txt`
+* `python run_accuracy.py --input-model=<path/to/model_file> --data-location=<path/to/dataset> --calib-preprocess=<path/to/calibrationset> --iters=100 --batch-size=1 --mode=benchmark --bfloat16 0`
 
-`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=<path/to/build>/build --batch_size=1 --mode=accuracy`
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/__init__.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_cal_images_list.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/brats_cal_images_list.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/brats_cal_images_list.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/Task043_BraTS_2019.py
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/__init__.py
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold0_validation.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold0_validation.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold0_validation.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold1_validation.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold1_validation.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold1_validation.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold2_validation.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold2_validation.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold2_validation.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold3_validation.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold3_validation.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold3_validation.txt
diff --git a/examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold4_validation.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt
similarity index 100%
rename from examples/pytorch/image_recognition/3d-unet/quantization/ptq/fx/folds/fold4_validation.txt
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/folds/fold4_validation.txt
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/postprocess.py
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/preprocess.py
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/nnUNet/setup.py
diff --git a/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
new file mode 100644
index 00000000000..4e85853747e
--- /dev/null
+++ b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
@@ -0,0 +1 @@
+nnunet
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
similarity index 100%
rename from examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_accuracy.py
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
similarity index 91%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
index 36f8d8502f0..fd466c5f8d0 100644
--- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
+++ b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
@@ -31,9 +31,6 @@ function init_params {
       --batch_size=*)
           batch_size=$(echo $var |cut -f2 -d=)
       ;;
-      --iters=*)
-          iters=$(echo $var |cut -f2 -d=)
-      ;;
       *)
           echo "Error: No such parameter: ${var}"
           exit 1
@@ -52,13 +49,12 @@ function run_benchmark {
         extra_cmd=""
     fi
 
-    python main.py \
+    python run_accuracy.py \
       --input-model=${input_model} \
       --data-location=${dataset_location} \
       --calib-preprocess=${BUILD_DIR}/calib_preprocess \
       --batch-size=${batch_size} \
       --mode=${mode} \
-      --iters=${iters} \
       ${extra_cmd}
 }
 
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
similarity index 97%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
index 79256545613..749b0c09c2b 100644
--- a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
+++ b/examples/deprecated/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
@@ -37,7 +37,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python main.py \
+    python run_accuracy.py \
       --input-model=${input_model} \
       --output-model=${output_model} \
       --data-location=${dataset_location} \
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
similarity index 76%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
index 6fa291d0b36..588ed699819 100644
--- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
+++ b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step
+Step-by-Step (Deprecated)
 ============
 
 This document is used to list steps of reproducing TensorFlow style transfer Intel® Neural Compressor tuning zoo result.
@@ -17,7 +17,7 @@ pip install neural-compressor
 ```shell
 pip install intel-tensorflow
 ```
-> Note: Supported Tensorflow [Version](../../../../../../README.md#supported-frameworks).
+> Note: Supported Tensorflow [Version](../../../../../../../README.md#supported-frameworks).
 
 ### Install Additional Dependency packages
 ```shell
@@ -61,7 +61,7 @@ optional arguments:
 
 ```shell
 wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz
-tar -xvzf arbitrary_style_transfer.tar.gz
+tar -xvzf arbitrary_style_transfer.tar.gz ./model
 ```
 
 ### 3. Prepare Dataset
@@ -70,12 +70,22 @@ There are two folders named style_images and content_images in current folder. P
 
 # Run Command
   ```shell
-  python main.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt
+  python style_tune.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt
   ```
 
 
 ## Quantization Config
 
+The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+
+```
+config = PostTrainingQuantConfig(
+    device="gpu",
+    backend="itex",
+    ...
+    )
+```
+
 ## Quantization
   ```shell
   bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model
@@ -109,9 +119,13 @@ Here we set the input tensor and output tensors name into *inputs* and *outputs*
 
 After prepare step is done, we just need add 2 lines to get the quantized model.
 ```python
-from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
-
-quant_config = StaticQuantConfig()
-q_model = quantize_model(graph, quant_config, calib_dataloader)
-q_model.save(FLAGS.output_model)
+from neural_compressor import quantization
+from neural_compressor.config import PostTrainingQuantConfig
+conf = PostTrainingQuantConfig(inputs=['style_input', 'content_input'],
+                                outputs=['transformer/expand/conv3/conv/Sigmoid'],
+                                calibration_sampling_size=[50, 100])
+quantized_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=dataloader,
+            eval_dataloader==dataloader)
 ```
+
+The Intel® Neural Compressor quantizer.fit() function will return a best quantized model during timeout constrain.
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/colva_beach_sq.jpg
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/content_images/golden_gate_sq.jpg
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/prepare_model.py
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
similarity index 97%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
index 41fee820958..08de33cba5b 100644
--- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
+++ b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
@@ -48,7 +48,7 @@ function run_benchmark {
     content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
     echo "$style_images, $content_images"
 
-    python main.py \
+    python style_tune.py \
             --input_model "${input_model}" \
             --style_images_paths "${style_images}" \
             --content_images_paths "${content_images}" \
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
similarity index 97%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
index 4fdfdd2e8a5..d5adc1060bb 100644
--- a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
+++ b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
@@ -38,7 +38,7 @@ function run_tuning {
     content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
     echo "$style_images, $content_images"
 
-    python main.py \
+    python style_tune.py \
             --input_model "${input_model}" \
             --style_images_paths "${style_images}" \
             --content_images_paths "${content_images}" \
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/kanagawa_great_wave.jpg
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_images/zigzag_colorful.jpg
diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py b/examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
similarity index 100%
rename from examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
rename to examples/deprecated/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/style_tune.py
diff --git a/examples/helloworld/README.md b/examples/helloworld/README.md
deleted file mode 100644
index a4251f4b4b9..00000000000
--- a/examples/helloworld/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-# Hello World Examples
-
-## PyTorch Examples
-### Examples List
-* [torch_woq](/examples/helloworld/torch_woq): apply the weight-only quantization to LLMs.
-* [torch_static_quant](/examples/helloworld/torch_static_quant): apply the static quantization to non-LLMs.
-
-## Tensorflow Examples (Deprecated)
-### Prerequisite
-Enter the following commands to prepare a dataset and pretrained models for the included Hello World examples:
-
-```shell
-pip install intel-tensorflow==2.10.0
-python train.py
-```
-The `train.py` script generates a saved model and a frozen pb at ./models for your use.
-> Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
-
-### Examples List
-* [tf_example1](/examples/helloworld/tf_example1): quantize with built-in dataloader and metric.
-* [tf_example2](/examples/helloworld/tf_example2): quantize keras model with customized metric and dataloader.
-* [tf_example3](/examples/helloworld/tf_example3): convert model with mix precision.
-* [tf_example4](/examples/helloworld/tf_example4): quantize checkpoint with dummy dataloader.
-* [tf_example5](/examples/helloworld/tf_example5): configure performance and accuracy measurement.
-* [tf_example6](/examples/helloworld/tf_example6): use default user-facing APIs to quantize a pb model.
-* [tf_example7](/examples/helloworld/tf_example7): quantize model with dummy dataset.
diff --git a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json b/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json
deleted file mode 100644
index 52a4ce513f4..00000000000
--- a/examples/onnxrt/nlp/huggingface_model/text_generation/llama/quantization/ptq_static/prompt.json
+++ /dev/null
@@ -1,7 +0,0 @@
-{
-    "32":   "Once upon a time, there existed a little girl who liked to have adventures. She wanted to go to places and meet new people, and have fun",
-    "512":  "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, insted of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dillema when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level",
-    "1024": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, insted of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dillema when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I haven't seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level, but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think DotA or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in vicinity of a plate",
-    "2017": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, insted of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dillema when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think DotA or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in the vicinity of a plate, it starts conquering it for its team. It takes around 10 seconds for a plate to be conquered; less if more pasta from the same team are around. If pasta from other team are around, though, they get locked down in their attempt, unable to conquer the plate, until one of them die (think Battlefield's standard 'Conquest' mode). You get points every second for every plate you own. Over time, the concept also evolved to use an Italian bistro as its main scenario. Carlos, Carlos' Bistro's founder and owner Setup No major changes were made from my work setup. I used FDT and Starling creating an Adobe AIR (ActionScript) project, all tools or frameworks I already had some knowledge with. One big change for me was that I livestreamed my work through a twitch.tv account. This was a new thing for me. As recommended by Roushey, I used a program called XSplit and I got to say, it is pretty amazing. It made the livestream pretty effortless and the features are awesome, even for the free version. It was great to have some of my friends watch me, and then interact with them and random people through chat. It was also good knowing that I was also recording a local version of the files, so I could make a timelapse video later. Knowing the video was being recorded also made me a lot more self-conscious about my computer use, as if someone was watching over my shoulder. It made me realize that sometimes I spend too much time in seemingly inane tasks (I ended up wasting the longest time just to get some text alignment the way I wanted - it'll probably drive someone crazy if they watch it) and that I do way too many typos where writing code. I pretty much spend half of the time writing a line and the other half fixing the crazy characters in it. My own stream was probably boring to watch since I was coding for the most time. But livestreaming is one of the cool things to do as a spectator too. It was great seeing other people working - I had a few tabs opened on my second monitor all the time. It's actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I'd only do it once in a while, when resting for a bit. Design Although I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art. I think it worked very well, fitting the mood of the game, but I also went overboard. For example: to know the state of a plate (who owns it, who's conquering it and how much time they have left before conquering it, which pasta units are in the queue, etc), you have to look at the plate's bill. The problem I realized when doing some tests is that people never look at the bill! They think it's some kind of prop, so they never actually read its details. Plus, if you're zoomed out too much, you can't actually read it, so it's hard to know what's going on with the game until you zoom in to the area of a specific plate. One other solution that didn't turn out to be as perfect as I thought was how to indicate who a plate base belongs to. In the game, that's indicated by the plate's decoration - its color denotes the team owner. But it's something that fits so well into the design that people never realized it, until they were told about it. In the end, the idea of going with a full physical metaphor is one that should be done with care. Things that are very important risk becoming background noise, unless the player knows its importance. Originally, I wanted to avoid any kind of heads-up display in my game. In the end, I ended up adding it at the bottom to indicate your credits and bases owned, as well as the hideous out-of-place-and-still-not-obvious 'Call Waiter' button. But in hindsight, I should have gone with a simple HUD from the start, especially one that indicated each team's colors and general state of the game without the need for zooming in and out. Development Development went fast. But not fast enough. Even though I worked around 32+ hours for this Ludum Dare, the biggest problem that I had to face in the end was overscoping. I had too much planned,",
-    "2048": "It is done, and submitted. You can play 'Survival of the Tastiest' on Android, and on the web. Playing on the web works, but you have to simulate multiple touch for table moving and that can be a bit confusing. There is a lot I'd like to talk about. I will go through every topic, insted of making the typical what went right/wrong list. Concept Working over the theme was probably one of the hardest tasks which I had to face. Originally, I had an idea of what kind of game I wanted to develop, gameplay wise - something with a lot of enemies/actors, simple graphics, maybe set in space, controlled from a top-down view. I was confident that I could fit any theme around it. In the end, the problem with a theme like 'Evolution' in a game is that evolution is unassisted. It happens through several seemingly random mutations over time, with the most apt permutation surviving. This genetic car simulator is, in my opinion, a great example of actual evolution of a species facing a challenge. But is it a game? In a game, you need to control something to reach an objective. That control goes against what evolution is supposed to be like. If you allow the user to pick how to evolve something, it's not evolution anymore - it's the equivalent of intelligent design, the fable invented by creationists to combat the idea of evolution. Being agnostic and a Pastafarian, that's not something that rubbed me the right way. Hence, my biggest dillema when deciding what to create was not with what I wanted to create, but with what I did not. I didn't want to create an 'intelligent design' simulator and wrongly call it evolution. This is a problem, of course, every other contestant also had to face. And judging by the entries submitted, not many managed to work around it. I'd say the only real solution was through the use of artificial selection, somehow. So far, I have not seen any entry using this at its core gameplay. Alas, this is just a fun competition and after a while I decided not to be as strict with the game idea, and allowed myself to pick whatever I thought would work out. My initial idea was to create something where humanity tried to evolve to a next level but had some kind of foe trying to stop them from doing so. I kind of had this image of human souls flying in space towards a monolith or a space baby (all based in 2001: A Space Odyssey of course) but I couldn't think of compelling (read: serious) mechanics for that. Borgs were my next inspiration, as their whole hypothesis fit pretty well into the evolution theme. But how to make it work? Are you the borg, or fighting the Borg? The third and final idea came to me through my girlfriend, who somehow gave me the idea of making something about the evolution of Pasta. The more I thought about it the more it sounded like it would work, so I decided to go with it. Conversations with my inspiring co-worker Roushey (who also created the 'Mechanical Underdogs' signature logo for my intros) further matured the concept, as it involved into the idea of having individual pieces of pasta flying around and trying to evolve until they became all-powerful. A secondary idea here was that the game would work to explain how the Flying Spaghetti Monster came to exist - by evolving from a normal dinner table. So the idea evolved more or less into this: you are sitting a table. You have your own plate, with is your 'base'. There are 5 other guests at the table, each with their own plate. Your plate can spawn little pieces of pasta. You do so by 'ordering' them through a menu. Some pastas are better than others; some are faster, some are stronger. They have varying 'costs', which are debited from your credits (you start with a number of credits). Once spawned, your pastas start flying around. Their instinct is to fly to other plates, in order to conquer them (the objective of the game is having your pasta conquer all the plates on the table). But they are really autonomous, so after being spawned, you have no control over your pasta (think DotA or LoL creeps). Your pasta doesn't like other people's pasta, so if they meet, they shoot sauce at each other until one dies. You get credits for other pastas your own pasta kill. Once a pasta is in the vicinity of a plate, it starts conquering it for its team. It takes around 10 seconds for a plate to be conquered; less if more pasta from the same team are around. If pasta from other team are around, though, they get locked down in their attempt, unable to conquer the plate, until one of them die (think Battlefield's standard 'Conquest' mode). You get points every second for every plate you own. Over time, the concept also evolved to use an Italian bistro as its main scenario. Carlos, Carlos' Bistro's founder and owner Setup No major changes were made from my work setup. I used FDT and Starling creating an Adobe AIR (ActionScript) project, all tools or frameworks I already had some knowledge with. One big change for me was that I livestreamed my work through a twitch.tv account. This was a new thing for me. As recommended by Roushey, I used a program called XSplit and I got to say, it is pretty amazing. It made the livestream pretty effortless and the features are awesome, even for the free version. It was great to have some of my friends watch me, and then interact with them and random people through chat. It was also good knowing that I was also recording a local version of the files, so I could make a timelapse video later. Knowing the video was being recorded also made me a lot more self-conscious about my computer use, as if someone was watching over my shoulder. It made me realize that sometimes I spend too much time in seemingly inane tasks (I ended up wasting the longest time just to get some text alignment the way I wanted - it'll probably drive someone crazy if they watch it) and that I do way too many typos where writing code. I pretty much spend half of the time writing a line and the other half fixing the crazy characters in it. My own stream was probably boring to watch since I was coding for the most time. But livestreaming is one of the cool things to do as a spectator too. It was great seeing other people working - I had a few tabs opened on my second monitor all the time. It's actually a bit sad, because if I could, I could have spent the whole weekend just watching other people working! But I had to do my own work, so I'd only do it once in a while, when resting for a bit. Design Although I wanted some simple, low-fi, high-contrast kind of design, I ended up going with somewhat realistic (vector) art. I think it worked very well, fitting the mood of the game, but I also went overboard. For example: to know the state of a plate (who owns it, who's conquering it and how much time they have left before conquering it, which pasta units are in the queue, etc), you have to look at the plate's bill. The problem I realized when doing some tests is that people never look at the bill! They think it's some kind of prop, so they never actually read its details. Plus, if you're zoomed out too much, you can't actually read it, so it's hard to know what's going on with the game until you zoom in to the area of a specific plate. One other solution that didn't turn out to be as perfect as I thought was how to indicate who a plate base belongs to. In the game, that's indicated by the plate's decoration - its color denotes the team owner. But it's something that fits so well into the design that people never realized it, until they were told about it. In the end, the idea of going with a full physical metaphor is one that should be done with care. Things that are very important risk becoming background noise, unless the player knows its importance. Originally, I wanted to avoid any kind of heads-up display in my game. In the end, I ended up adding it at the bottom to indicate your credits and bases owned, as well as the hideous out-of-place-and-still-not-obvious 'Call Waiter' button. But in hindsight, I should have gone with a simple HUD from the start, especially one that indicated each team's colors and general state of the game without the need for zooming in and out. Development Development went fast. But not fast enough. Even though I worked around 32+ hours for this Ludum Dare, the biggest problem that I had to face in the end was overscoping. I had too much planned, and couldn't get it all done. Content-wise, I had several kinds of pasta planned - Wikipedia is just amazing in that regard,"
-}
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py
deleted file mode 100644
index 1e88f8abad8..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_label_map.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
-}
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py
deleted file mode 100644
index 2c57fd61302..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/tiny_yolov3/quantization/ptq_static/coco_tools.py
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py
deleted file mode 100644
index 1e88f8abad8..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_label_map.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
-}
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py
deleted file mode 100644
index 2c57fd61302..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/yolov3/quantization/ptq_static/coco_tools.py
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py
deleted file mode 100644
index 1e88f8abad8..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_label_map.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
-}
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py
deleted file mode 100644
index 2c57fd61302..00000000000
--- a/examples/onnxrt/object_detection/onnx_model_zoo/yolov4/quantization/ptq_static/coco_tools.py
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
deleted file mode 100644
index 1e88f8abad8..00000000000
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_label_map.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
-}
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
deleted file mode 100644
index 2c57fd61302..00000000000
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v1/quantization/ptq_static/coco_tools.py
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py
deleted file mode 100644
index 1e88f8abad8..00000000000
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_label_map.py
+++ /dev/null
@@ -1,103 +0,0 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-
-"""The dict mapping category IDs to its names of labels."""
-
-category_map = {
-    1: 'person',
-    2: 'bicycle',
-    3: 'car',
-    4: 'motorcycle',
-    5: 'airplane',
-    6: 'bus',
-    7: 'train',
-    8: 'truck',
-    9: 'boat',
-    10: 'traffic light',
-    11: 'fire hydrant',
-    13: 'stop sign',
-    14: 'parking meter',
-    15: 'bench',
-    16: 'bird',
-    17: 'cat',
-    18: 'dog',
-    19: 'horse',
-    20: 'sheep',
-    21: 'cow',
-    22: 'elephant',
-    23: 'bear',
-    24: 'zebra',
-    25: 'giraffe',
-    27: 'backpack',
-    28: 'umbrella',
-    31: 'handbag',
-    32: 'tie',
-    33: 'suitcase',
-    34: 'frisbee',
-    35: 'skis',
-    36: 'snowboard',
-    37: 'sports ball',
-    38: 'kite',
-    39: 'baseball bat',
-    40: 'baseball glove',
-    41: 'skateboard',
-    42: 'surfboard',
-    43: 'tennis racket',
-    44: 'bottle',
-    46: 'wine glass',
-    47: 'cup',
-    48: 'fork',
-    49: 'knife',
-    50: 'spoon',
-    51: 'bowl',
-    52: 'banana',
-    53: 'apple',
-    54: 'sandwich',
-    55: 'orange',
-    56: 'broccoli',
-    57: 'carrot',
-    58: 'hot dog',
-    59: 'pizza',
-    60: 'donut',
-    61: 'cake',
-    62: 'chair',
-    63: 'couch',
-    64: 'potted plant',
-    65: 'bed',
-    67: 'dining table',
-    70: 'toilet',
-    72: 'tv',
-    73: 'laptop',
-    74: 'mouse',
-    75: 'remote',
-    76: 'keyboard',
-    77: 'cell phone',
-    78: 'microwave',
-    79: 'oven',
-    80: 'toaster',
-    81: 'sink',
-    82: 'refrigerator',
-    84: 'book',
-    85: 'clock',
-    86: 'vase',
-    87: 'scissors',
-    88: 'teddy bear',
-    89: 'hair drier',
-    90: 'toothbrush'
-}
\ No newline at end of file
diff --git a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py b/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py
deleted file mode 100644
index 2c57fd61302..00000000000
--- a/examples/onnxrt/object_detection/ssd_mobilenet_v2/quantization/ptq_static/coco_tools.py
+++ /dev/null
@@ -1,672 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2021 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""Wrappers for third party pycocotools to be used within object_detection.
-Note that nothing in this file is tensorflow related and thus cannot
-be called directly as a slim metric, for example.
-TODO(jonathanhuang): wrap as a slim metric in metrics.py
-Usage example: given a set of images with ids in the list image_ids
-and corresponding lists of numpy arrays encoding groundtruth (boxes and classes)
-and detections (boxes, scores and classes), where elements of each list
-correspond to detections/annotations of a single image,
-then evaluation (in multi-class mode) can be invoked as follows:
-  groundtruth_dict = coco_tools.ExportGroundtruthToCOCO(
-      image_ids, groundtruth_boxes_list, groundtruth_classes_list,
-      max_num_classes, output_path=None)
-  detections_list = coco_tools.ExportDetectionsToCOCO(
-      image_ids, detection_boxes_list, detection_scores_list,
-      detection_classes_list, output_path=None)
-  groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-  detections = groundtruth.LoadAnnotations(detections_list)
-  evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                         agnostic_mode=False)
-  metrics = evaluator.ComputeMetrics()
-"""
-
-import copy
-import time
-
-import numpy as np
-
-from collections import OrderedDict
-from neural_compressor.utils import logger
-from pycocotools import coco
-from pycocotools import cocoeval
-from pycocotools import mask
-from typing import Any, Dict, List, Set, Union
-
-
-class COCOWrapper(coco.COCO):
-    """Wrapper for the pycocotools COCO class.
-    
-    Attributes:
-      dataset: a dictionary holding bounding box annotations in the COCO format.
-      detection_type: type of detections being wrapped. Can be one of ['bbox',
-        'segmentation']
-    """
-
-    def __init__(self, dataset: Dict[str, Any], detection_type: str = 'bbox'):
-        """Construct a COCOWrapper.
-        See http://mscoco.org/dataset/#format for a description of the format.
-        By default, the coco.COCO class constructor reads from a JSON file.
-        This function duplicates the same behavior but loads from a dictionary,
-        allowing us to perform evaluation without writing to external storage.
-        Args:
-          dataset: a dictionary holding bounding box annotations in the COCO format.
-          detection_type: type of detections being wrapped. Can be one of ['bbox',
-            'segmentation']
-        Raises:
-          ValueError: if detection_type is unsupported.
-        """
-        supported_detection_types = ['bbox', 'segmentation']
-        if detection_type not in supported_detection_types:
-            raise ValueError('Unsupported detection type: {}. '
-                             'Supported values are: {}'.format(
-                                 detection_type, supported_detection_types))
-        self._detection_type = detection_type
-        coco.COCO.__init__(self)
-        self.dataset = dataset
-        self.createIndex()
-
-    def LoadAnnotations(self, annotations: list) -> coco.COCO:
-        """Load annotations dictionary into COCO datastructure.
-        See http://mscoco.org/dataset/#format for a description of the annotations
-        format.  As above, this function replicates the default behavior of the API
-        but does not require writing to external storage.
-        Args:
-          annotations: python list holding object detection results where each
-            detection is encoded as a dict with required keys ['image_id',
-            'category_id', 'score'] and one of ['bbox', 'segmentation'] based on
-            `detection_type`.
-        Returns:
-          a coco.COCO datastructure holding object detection annotations results
-        Raises:
-          ValueError: if (1) annotations is not a list or annotations do not 
-            correspond to the images contained in self.
-        """
-        results = coco.COCO()
-        results.dataset['images'] = [img for img in self.dataset['images']]
-
-        logger.info("Load and prepare annotation results.")
-        tic = time.time()
-
-        if not isinstance(annotations, list):
-            raise ValueError('annotations is not a list of objects')
-        annotation_img_ids = [ann['image_id'] for ann in annotations]
-        if (set(annotation_img_ids) != (set(annotation_img_ids)
-                                        & set(self.getImgIds()))):
-            raise ValueError('Results do not correspond to current coco set')
-        results.dataset['categories'] = copy.deepcopy(
-            self.dataset['categories'])
-        if self._detection_type == 'bbox':
-            for idx, ann in enumerate(annotations):
-                bb = ann['bbox']
-                ann['area'] = bb[2] * bb[3]
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        elif self._detection_type == 'segmentation':
-            for idx, ann in enumerate(annotations):
-                ann['area'] = mask.area(ann['segmentation'])
-                ann['bbox'] = mask.toBbox(ann['segmentation'])
-                ann['id'] = idx + 1
-                ann['iscrowd'] = 0
-        logger.info('DONE (t=%0.2fs)', (time.time() - tic))
-
-        results.dataset['annotations'] = annotations
-        results.createIndex()
-        return results
-
-
-class COCOEvalWrapper(cocoeval.COCOeval):
-    """Wrapper for the pycocotools COCOeval class.
-    To evaluate, create two objects (groundtruth_dict and detections_list)
-    using the conventions listed at http://mscoco.org/dataset/#format.
-    Then call evaluation as follows:
-      groundtruth = coco_tools.COCOWrapper(groundtruth_dict)
-      detections = groundtruth.LoadAnnotations(detections_list)
-      evaluator = coco_tools.COCOEvalWrapper(groundtruth, detections,
-                                             agnostic_mode=False)
-      metrics = evaluator.ComputeMetrics()
-    """
-
-    def __init__(self,
-                 groundtruth: coco.COCO = None,
-                 detections: coco.COCO = None,
-                 agnostic_mode = False,
-                 iou_type: str = 'bbox',
-                 iou_thrs: Union[str, float] = None,
-                 map_points=None):
-        """Construct a COCOEvalWrapper.
-        Note that for the area-based metrics to be meaningful, detection and
-        groundtruth boxes must be in image coordinates measured in pixels.
-        Args:
-          groundtruth: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            groundtruth annotations
-          detections: a coco.COCO (or coco_tools.COCOWrapper) object holding
-            detections
-          agnostic_mode: boolean (default: False).  If True, evaluation ignores
-            class labels, treating all detections as proposals.
-          iou_thrs: Minimal value for intersection over union that allows to
-                    make decision that prediction bounding box is true positive.
-                    You can specify one float value between 0 to 1 or
-                    string "05:0.05:0.95" for standard COCO thresholds.
-          iou_type: IOU type to use for evaluation. Supports `bbox` or `segm`.
-          map_points: The way to calculate mAP. 101 for 101-point interpolated AP, 11 for
-                          11-point interpolated AP, 0 for area under PR curve.
-        """
-        cocoeval.COCOeval.__init__(self,
-                                   groundtruth,
-                                   detections,
-                                   iouType=iou_type)
-        if agnostic_mode:
-            self.params.useCats = 0
-        if iou_thrs == '0.5:0.05:0.95':
-          self.params.iouThrs = np.linspace(.5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, \
-            endpoint=True)
-        elif isinstance(iou_thrs, float):
-          self.params.iouThrs = [iou_thrs]
-
-        if map_points == 101:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .01)) + 1, \
-            endpoint=True)
-        if map_points == 11:
-          self.params.recThrs = np.linspace(.0, 1.00, int(np.round((1.00 - .0) / .1)) + 1, \
-            endpoint=True)
-        if map_points == 0:
-          self.params.recThrs = [-1]
-
-
-    def GetCategory(self, category_id: int) -> dict:
-        """Fetch dictionary holding category information given category id.
-        Args:
-          category_id: integer id
-        Returns:
-          dictionary holding 'id', 'name'.
-        """
-        return self.cocoGt.cats[category_id]
-
-    def GetAgnosticMode(self) -> bool:
-        """Return whether COCO Eval is configured to evaluate in agnostic mode."""
-        return self.params.useCats == 0
-
-    def GetCategoryIdList(self) -> List[int]:
-        """Return the list of IDs of all valid categories."""
-        return self.params.catIds
-
-    def accumulate(self, p: cocoeval.Params = None):
-        """Accumulate evaluation results per image and store it to self.eval.
-        
-        Args:
-          p: input params for evaluation
-        """
-        print('Accumulating evaluation results...')
-        tic = time.time()
-        if not self.evalImgs:
-            print('Please run evaluate() first')
-        # allows input customized parameters
-        if p is None:
-            p = self.params
-        p.catIds = p.catIds if p.useCats == 1 else [-1]
-        T           = len(p.iouThrs)
-        R           = len(p.recThrs)
-        K           = len(p.catIds) if p.useCats else 1
-        A           = len(p.areaRng)
-        M           = len(p.maxDets)
-        precision   = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
-        recall      = -np.ones((T,K,A,M))
-        scores      = -np.ones((T,R,K,A,M))
-
-        # create dictionary for future indexing
-        _pe = self._paramsEval
-        print('-pe', _pe)
-        catIds = _pe.catIds if _pe.useCats else [-1]
-        setK = set(catIds)
-        setA = set(map(tuple, _pe.areaRng))
-        setM = set(_pe.maxDets)
-        setI = set(_pe.imgIds)
-        # get inds to evaluate
-        k_list = [n for n, k in enumerate(p.catIds)  if k in setK]
-        m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
-        a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
-        i_list = [n for n, i in enumerate(p.imgIds)  if i in setI]
-        I0 = len(_pe.imgIds)
-        A0 = len(_pe.areaRng)
-        # retrieve E at each category, area range, and max number of detections
-        for k, k0 in enumerate(k_list):
-            Nk = k0*A0*I0
-            for a, a0 in enumerate(a_list):
-                Na = a0*I0
-                for m, maxDet in enumerate(m_list):
-                    E = [self.evalImgs[Nk + Na + i] for i in i_list]
-                    E = [e for e in E if not e is None]
-                    if len(E) == 0: continue
-                    dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
-
-                    # different sorting method generates slightly different results.
-                    # mergesort is used to be consistent as Matlab implementation.
-                    inds = np.argsort(-dtScores, kind='mergesort')
-                    dtScoresSorted = dtScores[inds]
-
-                    dtm  = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
-                    dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet]  for e in E], axis=1)[:,inds]
-                    gtIg = np.concatenate([e['gtIgnore'] for e in E])
-                    npig = np.count_nonzero(gtIg==0 )
-                    if npig == 0: continue
-                    tps = np.logical_and(               dtm,  np.logical_not(dtIg) )
-                    fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
-
-                    tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float32)
-                    fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float32)
-                    for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
-                        tp = np.array(tp)
-                        fp = np.array(fp)
-                        nd = len(tp)
-                        rc = tp / npig
-                        pr = tp / (fp+tp+np.spacing(1))
-
-                        # calculate precision
-                        if R == 1:
-                          rc = np.concatenate(([0.], rc, [1.]))
-                          pr = np.concatenate(([0.], pr, [0.]))
-
-                          # compute the precision envelope
-                          for i in range(pr.size - 1, 0, -1):
-                              pr[i - 1] = np.maximum(pr[i - 1], pr[i])
-
-                          # to calculate area under PR curve, look for points
-                          # where X axis (recall) changes value
-                          change_point = np.where(rc[1:] != rc[:-1])[0]
-                          # and sum (\Delta recall) * recall
-                          res = np.sum((rc[change_point + 1] - rc[change_point]) \
-                            * pr[change_point + 1])
-                          precision[t,:,k,a,m] = np.array([res])
-                        else:
-                          q  = np.zeros((R,))
-
-                          # numpy is slow without cython optimization for accessing elements
-                          # use python array gets significant speed improvement
-                          pr = pr.tolist(); q = q.tolist()
-
-                          for i in range(nd-1, 0, -1):
-                            if pr[i] > pr[i-1]:
-                                pr[i-1] = pr[i]
-
-                          inds = np.searchsorted(rc, p.recThrs, side='left')
-                          try:
-                            for ri, pi in enumerate(inds):
-                              q[ri] = pr[pi]
-                          except:
-                            pass  
-                          precision[t,:,k,a,m] = np.array(q)
-
-                        # calculate recall
-                        if nd:
-                          recall[t,k,a,m] = rc[-1]
-                        else:
-                          recall[t,k,a,m] = 0
-
-                        # calculate score
-                        ss = np.zeros((R,))
-                        inds = np.searchsorted(rc, p.recThrs, side='left')
-                        try:
-                          for ri, pi in enumerate(inds):
-                            ss[ri] = dtScoresSorted[pi]
-                        except:
-                          pass  
-                        scores[t,:,k,a,m] = np.array(ss)
-        # exit(0)
-        self.eval = {
-            'params': p,
-            'counts': [T, R, K, A, M],
-            'precision': precision,
-            'recall':   recall,
-            'scores': scores,
-        }
-        toc = time.time()
-        print('DONE (t={:0.2f}s).'.format( toc-tic))
-
-
-    def ComputeMetrics(self,
-                       include_metrics_per_category: bool = False,
-                       all_metrics_per_category: bool = False): # pragma: no cover
-        """Compute detection metrics.
-        Args:
-          include_metrics_per_category: Whether include metrics per category.
-          all_metrics_per_category: Whether include all the summery metrics for
-            each category in per_category_ap. Be careful with setting it to true if
-            you have more than handful of categories, because it will pollute
-            your mldash.
-        Returns:
-          A tuple of (summary_metrics, per_category_ap), in which
-            (1) summary_metrics is a dictionary holding:
-              'Precision/mAP': mean average precision over classes averaged over IOU
-                thresholds ranging from .5 to .95 with .05 increments;
-              'Precision/mAP@.50IOU': mean average precision at 50% IOU;
-              'Precision/mAP@.75IOU': mean average precision at 75% IOU;
-              'Precision/mAP (small)': mean average precision for small objects
-                (area < 32^2 pixels);
-              'Precision/mAP (medium)': mean average precision for medium sized
-                objects (32^2 pixels < area < 96^2 pixels);
-              'Precision/mAP (large)': mean average precision for large objects
-                (96^2 pixels < area < 10000^2 pixels);
-              'Recall/AR@1': average recall with 1 detection;
-              'Recall/AR@10': average recall with 10 detections;
-              'Recall/AR@100': average recall with 100 detections;
-              'Recall/AR@100 (small)': average recall for small objects with 100
-                detections;
-              'Recall/AR@100 (medium)': average recall for medium objects with 100
-                detections;
-              'Recall/AR@100 (large)': average recall for large objects with 100
-                detections; 
-            and (2) per_category_ap is a dictionary holding category specific results with
-              keys of the form: 'Precision mAP ByCategory/category'
-              (without the supercategory part if no supercategories exist).
-          For backward compatibility 'PerformanceByCategory' is included in the
-            output regardless of all_metrics_per_category. If evaluating class-agnostic
-            mode, per_category_ap is an empty dictionary.
-        Raises:
-          ValueError: If category_stats does not exist.
-        """
-        self.evaluate()
-        self.accumulate()
-        self.summarize()
-
-        summary_metrics = OrderedDict([
-            ('Precision/mAP', self.stats[0]),
-            ('Precision/mAP@.50IOU', self.stats[1]),
-            ('Precision/mAP@.75IOU', self.stats[2]),
-            ('Precision/mAP (small)', self.stats[3]),
-            ('Precision/mAP (medium)', self.stats[4]),
-            ('Precision/mAP (large)', self.stats[5]),
-            ('Recall/AR@1', self.stats[6]), ('Recall/AR@10', self.stats[7]),
-            ('Recall/AR@100', self.stats[8]),
-            ('Recall/AR@100 (small)', self.stats[9]),
-            ('Recall/AR@100 (medium)', self.stats[10]),
-            ('Recall/AR@100 (large)', self.stats[11])
-        ])
-        if not include_metrics_per_category:
-            return summary_metrics, {}
-        if not hasattr(self, 'category_stats'):
-            raise ValueError('Category stats do not exist')
-        per_category_ap = OrderedDict([])
-        if self.GetAgnosticMode():
-            return summary_metrics, per_category_ap
-        for category_index, category_id in enumerate(self.GetCategoryIdList()):
-            category = self.GetCategory(category_id)['name']
-            # Kept for backward compatilbility
-            # pylint: disable=no-member
-            per_category_ap['PerformanceByCategory/mAP/{}'.format(
-                category)] = self.category_stats[0][category_index]
-            if all_metrics_per_category:
-                per_category_ap['Precision mAP ByCategory/{}'.format(
-                    category)] = self.category_stats[0][category_index]
-                per_category_ap['Precision mAP@.50IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[1][category_index]
-                per_category_ap['Precision mAP@.75IOU ByCategory/{}'.format(
-                    category)] = self.category_stats[2][category_index]
-                per_category_ap['Precision mAP (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[3][category_index]
-                per_category_ap['Precision mAP (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[4][category_index]
-                per_category_ap['Precision mAP (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[5][category_index]
-                per_category_ap['Recall AR@1 ByCategory/{}'.format(
-                    category)] = self.category_stats[6][category_index]
-                per_category_ap['Recall AR@10 ByCategory/{}'.format(
-                    category)] = self.category_stats[7][category_index]
-                per_category_ap['Recall AR@100 ByCategory/{}'.format(
-                    category)] = self.category_stats[8][category_index]
-                per_category_ap['Recall AR@100 (small) ByCategory/{}'.format(
-                    category)] = self.category_stats[9][category_index]
-                per_category_ap['Recall AR@100 (medium) ByCategory/{}'.format(
-                    category)] = self.category_stats[10][category_index]
-                per_category_ap['Recall AR@100 (large) ByCategory/{}'.format(
-                    category)] = self.category_stats[11][category_index]
-
-        return summary_metrics, per_category_ap
-
-
-def _ConvertBoxToCOCOFormat(box):
-    """Convert a box in [ymin, xmin, ymax, xmax] format to COCO format.
-    This is a utility function for converting from our internal
-    [ymin, xmin, ymax, xmax] convention to the convention used by the COCO API
-    i.e., [xmin, ymin, width, height].
-    Args:
-      box: a numpy array in format of [ymin, xmin, ymax, xmax]
-    Returns:
-      A list of floats, in COCO format, representing [xmin, ymin, width, height]
-    """
-    return [
-        float(box[1]),
-        float(box[0]),
-        float(box[3] - box[1]),
-        float(box[2] - box[0])
-    ]
-
-
-def _RleCompress(masks):
-    """Compresses mask using Run-length encoding provided by pycocotools.
-    Args:
-      masks: uint8 numpy array of shape [mask_height, mask_width] with values in
-        {0, 1}.
-    Returns:
-      A pycocotools Run-length encoding of the mask.
-    """
-    return mask.encode(np.asfortranarray(masks))
-
-
-def ExportSingleImageGroundtruthToCoco(image_id: Union[int, str],
-                                       next_annotation_id: int,
-                                       category_id_set: Set[str],
-                                       groundtruth_boxes: np.array,
-                                       groundtruth_classes: np.array,
-                                       groundtruth_masks: Union[np.array, None] = None,
-                                       groundtruth_is_crowd: Union[np.array, None] = None) -> list:
-    """Export groundtruth of a single image to COCO format.
-    This function converts groundtruth detection annotations represented as numpy
-    arrays to dictionaries that can be ingested by the COCO evaluation API. Note
-    that the image_ids provided here must match the ones given to
-    ExportSingleImageDetectionsToCoco. We assume that boxes and classes are in
-    correspondence - that is: groundtruth_boxes[i, :], and
-    groundtruth_classes[i] are associated with the same groundtruth annotation.
-    In the exported result, "area" fields are always set to the area of the
-    groundtruth bounding box.
-    Args:
-      image_id: a unique image identifier either of type integer or string.
-      next_annotation_id: integer specifying the first id to use for the
-        groundtruth annotations. All annotations are assigned a continuous integer
-        id starting from this value.
-      category_id_set: A set of valid class ids. Groundtruth with classes not in
-        category_id_set are dropped.
-      groundtruth_boxes: numpy array (float32) with shape [num_gt_boxes, 4]
-      groundtruth_classes: numpy array (int) with shape [num_gt_boxes]
-      groundtruth_masks: optional uint8 numpy array of shape [num_detections,
-        image_height, image_width] containing detection_masks.
-      groundtruth_is_crowd: optional numpy array (int) with shape [num_gt_boxes]
-        indicating whether groundtruth boxes are crowd.
-    Returns:
-      A list of groundtruth annotations for a single image in the COCO format.
-    Raises:
-      ValueError: if (1) groundtruth_boxes and groundtruth_classes do not have the
-        right lengths or (2) if each of the elements inside these lists do not
-        have the correct shapes or (3) if image_ids are not integers
-    """
-    if len(groundtruth_classes.shape) != 1:
-        raise ValueError('groundtruth_classes is ' 'expected to be of rank 1.')
-    if len(groundtruth_boxes.shape) != 2:
-        raise ValueError('groundtruth_boxes is expected to be of ' 'rank 2.')
-    if groundtruth_boxes.shape[1] != 4:
-        raise ValueError('groundtruth_boxes should have ' 'shape[1] == 4.')
-    num_boxes = groundtruth_classes.shape[0]
-    if num_boxes != groundtruth_boxes.shape[0]:
-        raise ValueError(
-            'Corresponding entries in groundtruth_classes, '
-            'and groundtruth_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension).'
-            'Classes shape: %d. Boxes shape: %d. Image ID: %s' %
-            (groundtruth_classes.shape[0], groundtruth_boxes.shape[0],
-             image_id))
-    has_is_crowd = groundtruth_is_crowd is not None
-    if has_is_crowd and len(groundtruth_is_crowd.shape) != 1:
-        raise ValueError('groundtruth_is_crowd is expected to be of rank 1.')
-    groundtruth_list = []
-    for i in range(num_boxes):
-        if groundtruth_classes[i] in category_id_set:
-            iscrowd = groundtruth_is_crowd[i] if has_is_crowd else 0
-            export_dict = {
-                'id':
-                next_annotation_id + i,
-                'image_id':
-                image_id,
-                'category_id':
-                int(groundtruth_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(groundtruth_boxes[i, :])),
-                'area':
-                float((groundtruth_boxes[i, 2] - groundtruth_boxes[i, 0]) *
-                      (groundtruth_boxes[i, 3] - groundtruth_boxes[i, 1])),
-                'iscrowd':
-                iscrowd
-            }
-            if groundtruth_masks is not None:
-                export_dict['segmentation'] = _RleCompress(
-                    groundtruth_masks[i])
-            groundtruth_list.append(export_dict)
-    return groundtruth_list
-
-
-def ExportSingleImageDetectionBoxesToCoco(image_id: Union[int, str], 
-                                          category_id_set: Set[int],
-                                          detection_boxes: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detections of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. Note that the image_ids
-    provided here must match the ones given to the
-    ExporSingleImageDetectionBoxesToCoco. We assume that boxes, and classes are in
-    correspondence - that is: boxes[i, :], and classes[i]
-    are associated with the same groundtruth annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-          category_id_set are dropped.
-        detection_boxes: float numpy array of shape [num_detections, 4] containing
-          detection boxes.
-        detection_scores: float numpy array of shape [num_detections] containing
-          scored for the detection boxes.
-        detection_classes: integer numpy array of shape [num_detections] containing
-          the classes for detection boxes.
-    Returns:
-        A list of detection annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_boxes, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    if len(detection_boxes.shape) != 2:
-        raise ValueError('All entries in detection_boxes expected to be of '
-                         'rank 2.')
-    if detection_boxes.shape[1] != 4:
-        raise ValueError('All entries in detection_boxes should have '
-                         'shape[1] == 4.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == detection_boxes.shape[0] == detection_scores.shape[0]:
-        raise ValueError(
-            'Corresponding entries in detection_classes, '
-            'detection_scores and detection_boxes should have '
-            'compatible shapes (i.e., agree on the 0th dimension). '
-            'Classes shape: %d. Boxes shape: %d. '
-            'Scores shape: %d' %
-            (detection_classes.shape[0], detection_boxes.shape[0],
-             detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'bbox':
-                list(_ConvertBoxToCOCOFormat(detection_boxes[i, :])),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
-
-
-def ExportSingleImageDetectionMasksToCoco(image_id: Union[str, int], 
-                                          category_id_set: Set[int],
-                                          detection_masks: np.array, 
-                                          detection_scores: np.array,
-                                          detection_classes: np.array) -> list:
-    """Export detection masks of a single image to COCO format.
-    This function converts detections represented as numpy arrays to dictionaries
-    that can be ingested by the COCO evaluation API. We assume that
-    detection_masks, detection_scores, and detection_classes are in correspondence
-    - that is: detection_masks[i, :], detection_classes[i] and detection_scores[i]
-        are associated with the same annotation.
-    Args:
-        image_id: unique image identifier either of type integer or string.
-        category_id_set: A set of valid class ids. Detections with classes not in
-        category_id_set are dropped.
-        detection_masks: uint8 numpy array of shape [num_detections, image_height,
-        image_width] containing detection_masks.
-        detection_scores: float numpy array of shape [num_detections] containing
-        scores for detection masks.
-        detection_classes: integer numpy array of shape [num_detections] containing
-        the classes for detection masks.
-    Returns:
-        A list of detection mask annotations for a single image in the COCO format.
-    Raises:
-        ValueError: if (1) detection_masks, detection_scores and detection_classes
-        do not have the right lengths or (2) if each of the elements inside these
-        lists do not have the correct shapes or (3) if image_ids are not integers.
-    """
-    if len(detection_classes.shape) != 1 or len(detection_scores.shape) != 1:
-        raise ValueError(
-            'All entries in detection_classes and detection_scores'
-            'expected to be of rank 1.')
-    num_boxes = detection_classes.shape[0]
-    if not num_boxes == len(detection_masks) == detection_scores.shape[0]:
-        raise ValueError('Corresponding entries in detection_classes, '
-                         'detection_scores and detection_masks should have '
-                         'compatible lengths and shapes '
-                         'Classes length: %d.  Masks length: %d. '
-                         'Scores length: %d' %
-                         (detection_classes.shape[0], len(detection_masks),
-                          detection_scores.shape[0]))
-    detections_list = []
-    for i in range(num_boxes):
-        if detection_classes[i] in category_id_set:
-            detections_list.append({
-                'image_id':
-                image_id,
-                'category_id':
-                int(detection_classes[i]),
-                'segmentation':
-                _RleCompress(detection_masks[i]),
-                'score':
-                float(detection_scores[i])
-            })
-    return detections_list
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/README.md b/examples/pytorch/cv/fp8_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/cv/fp8_quant/README.md
rename to examples/pytorch/cv/fp8_quant/README.md
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/extract_ILSVRC.sh b/examples/pytorch/cv/fp8_quant/extract_ILSVRC.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/fp8_quant/extract_ILSVRC.sh
rename to examples/pytorch/cv/fp8_quant/extract_ILSVRC.sh
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/main.py b/examples/pytorch/cv/fp8_quant/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/cv/fp8_quant/main.py
rename to examples/pytorch/cv/fp8_quant/main.py
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/requirements.txt b/examples/pytorch/cv/fp8_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/cv/fp8_quant/requirements.txt
rename to examples/pytorch/cv/fp8_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/cv/fp8_quant/run_quant.sh b/examples/pytorch/cv/fp8_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/fp8_quant/run_quant.sh
rename to examples/pytorch/cv/fp8_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/README.md b/examples/pytorch/cv/mixed_precision/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/cv/mixed_precision/README.md
rename to examples/pytorch/cv/mixed_precision/README.md
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/main.py b/examples/pytorch/cv/mixed_precision/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/cv/mixed_precision/main.py
rename to examples/pytorch/cv/mixed_precision/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/requirements.txt b/examples/pytorch/cv/mixed_precision/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/qat/fx/requirements.txt
rename to examples/pytorch/cv/mixed_precision/requirements.txt
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/run_autotune.sh b/examples/pytorch/cv/mixed_precision/run_autotune.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/mixed_precision/run_autotune.sh
rename to examples/pytorch/cv/mixed_precision/run_autotune.sh
diff --git a/examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh b/examples/pytorch/cv/mixed_precision/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/mixed_precision/run_benchmark.sh
rename to examples/pytorch/cv/mixed_precision/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/cv/static_quant/README.md b/examples/pytorch/cv/static_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/README.md
rename to examples/pytorch/cv/static_quant/README.md
diff --git a/examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh b/examples/pytorch/cv/static_quant/extract_ILSVRC.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/extract_ILSVRC.sh
rename to examples/pytorch/cv/static_quant/extract_ILSVRC.sh
diff --git a/examples/3.x_api/pytorch/cv/static_quant/main.py b/examples/pytorch/cv/static_quant/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/main.py
rename to examples/pytorch/cv/static_quant/main.py
diff --git a/examples/3.x_api/pytorch/cv/static_quant/requirements.txt b/examples/pytorch/cv/static_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/requirements.txt
rename to examples/pytorch/cv/static_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh b/examples/pytorch/cv/static_quant/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/run_benchmark.sh
rename to examples/pytorch/cv/static_quant/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/cv/static_quant/run_quant.sh b/examples/pytorch/cv/static_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/cv/static_quant/run_quant.sh
rename to examples/pytorch/cv/static_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/README.md b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/README.md
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/README.md
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/main.py b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/main.py
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/main.py
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/requirements.txt b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/requirements.txt
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/setup.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/setup.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/setup.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/fp8_quant/setup.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
similarity index 97%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
index 57a40cafcfb..a9175bf98ad 100644
--- a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
+++ b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/README.md
@@ -1,83 +1,83 @@
-Step-by-Step
-============
-This document describes the step-by-step instructions to run [stable diffusion XL model](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) using Smooth Quantization to accelerate inference while maintain the quality of output image.
-
-# Prerequisite
-
-## Environment
-Recommend python 3.9 or higher version.
-
-```shell
-pip install -r requirements.txt
-```
-**Note**: IPEX along with torch require nightly version (2.4) for compatibility. Please refer to [installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=cpu&version=main&os=linux%2fwsl2&package=source). 
-
-# Run
-
-To quantize the model:
-```bash
-python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --alpha 0.44 --output_dir "./saved_results"
-```
-or
-```bash
-sh run_quant.sh --alpha=0.44
-```
-To load a quantized model:
-```bash
-python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --load --optimized
-```
-or
-```bash
-sh run_quant.sh --int8=true
-```
-
-# Results
-## Image Generated
-
-With caption `"A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground."`, results of fp32 model and int8 model are listed left and right respectively.
-
-<p float="left">
-  <img src="./images/fp32.jpg" width = "300" height = "300" alt="bf16" align=center />
-  <img src="./images/int8.jpg" width = "300" height = "300" alt="int8" align=center />
-</p>
-
-## CLIP evaluation
-We have also evaluated CLIP scores on 5000 samples from COCO2014 validation dataset for FP32 model and INT8 model. CLIP results are listed below.
-
-| Precision            | FP32  | INT8  | 
-|----------------------|-------|-------|
-| CLIP on COCO2014 val  | 32.05 | 31.77 |
-
-We're using the mlperf_sd_inference [repo](https://github.com/ahmadki/mlperf_sd_inference) to evaluate CLIP scores. In order to support evaluation on quantized model,
-we made some modification on the script (`main.py`). Please use as following:
-```bash
-git clone https://github.com/ahmadki/mlperf_sd_inference.git
-cd mlperf_sd_inference
-mv ../main.py ./
-```
-After setting the environment as instructed in the repo, you can execute the modified `main.py` script to generate images:
-```bash
-python main.py \
-    --model-id stabilityai/stable-diffusion-xl-base-1.0 \
-    --quantized-unet ./saved_results \  # quantized model saving path, should include `qconfig.json` and `quantized_model.pt`
-    --precision fp32 \ 
-    --guidance 8.0 \
-    --steps 20 \
-    --iters 200 \  # change to 5000 for the full 5k dataset
-    --latent-path latents.pt \
-    --base-output-dir ./output
-```
-Then you can compute CLIP score using the images generated by the quantized model:
-```bash
-mv ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/* ./output/  # switch directory
-rm -rf ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/
-
-python clip/clip_score.py \
-    --tsv-file captions_5k.tsv \
-    --image-folder ./output \  # folder with the generated images
-    --device "cpu"     
-```
-Or you can use the bash script for all steps above:
-```bash
-sh run_benchmark.sh --mode=accuracy --optimized=true
-```
+Step-by-Step
+============
+This document describes the step-by-step instructions to run [stable diffusion XL model](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) using Smooth Quantization to accelerate inference while maintain the quality of output image.
+
+# Prerequisite
+
+## Environment
+Recommend python 3.9 or higher version.
+
+```shell
+pip install -r requirements.txt
+```
+**Note**: IPEX along with torch require nightly version (2.4) for compatibility. Please refer to [installation](https://intel.github.io/intel-extension-for-pytorch/index.html#installation?platform=cpu&version=main&os=linux%2fwsl2&package=source). 
+
+# Run
+
+To quantize the model:
+```bash
+python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --alpha 0.44 --output_dir "./saved_results"
+```
+or
+```bash
+sh run_quant.sh --alpha=0.44
+```
+To load a quantized model:
+```bash
+python sdxl_smooth_quant.py --model_name_or_path stabilityai/stable-diffusion-xl-base-1.0 --quantize --load --optimized
+```
+or
+```bash
+sh run_quant.sh --int8=true
+```
+
+# Results
+## Image Generated
+
+With caption `"A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground."`, results of fp32 model and int8 model are listed left and right respectively.
+
+<p float="left">
+  <img src="./images/fp32.jpg" width = "300" height = "300" alt="bf16" align=center />
+  <img src="./images/int8.jpg" width = "300" height = "300" alt="int8" align=center />
+</p>
+
+## CLIP evaluation
+We have also evaluated CLIP scores on 5000 samples from COCO2014 validation dataset for FP32 model and INT8 model. CLIP results are listed below.
+
+| Precision            | FP32  | INT8  | 
+|----------------------|-------|-------|
+| CLIP on COCO2014 val  | 32.05 | 31.77 |
+
+We're using the mlperf_sd_inference [repo](https://github.com/ahmadki/mlperf_sd_inference) to evaluate CLIP scores. In order to support evaluation on quantized model,
+we made some modification on the script (`main.py`). Please use as following:
+```bash
+git clone https://github.com/ahmadki/mlperf_sd_inference.git
+cd mlperf_sd_inference
+mv ../main.py ./
+```
+After setting the environment as instructed in the repo, you can execute the modified `main.py` script to generate images:
+```bash
+python main.py \
+    --model-id stabilityai/stable-diffusion-xl-base-1.0 \
+    --quantized-unet ./saved_results \  # quantized model saving path, should include `qconfig.json` and `quantized_model.pt`
+    --precision fp32 \ 
+    --guidance 8.0 \
+    --steps 20 \
+    --iters 200 \  # change to 5000 for the full 5k dataset
+    --latent-path latents.pt \
+    --base-output-dir ./output
+```
+Then you can compute CLIP score using the images generated by the quantized model:
+```bash
+mv ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/* ./output/  # switch directory
+rm -rf ./output/stabilityai--stable-diffusion-xl-base-1.0__euler__20__8.0__fp32/
+
+python clip/clip_score.py \
+    --tsv-file captions_5k.tsv \
+    --image-folder ./output \  # folder with the generated images
+    --device "cpu"     
+```
+Or you can use the bash script for all steps above:
+```bash
+sh run_benchmark.sh --mode=accuracy --optimized=true
+```
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/fp32.jpg
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/images/int8.jpg
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/latents.pt
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
similarity index 97%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
index c4c6632953b..95259635967 100644
--- a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
+++ b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/main.py
@@ -1,485 +1,485 @@
-import os
-import logging
-import tempfile
-import shutil
-import argparse
-import pandas as pd
-import time
-import torch
-import intel_extension_for_pytorch as ipex
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-from diffusers import (
-    DDPMScheduler,
-    DDIMScheduler,
-    EulerDiscreteScheduler,
-    EulerAncestralDiscreteScheduler,
-    StableDiffusionXLPipeline,
-    StableDiffusionXLImg2ImgPipeline,
-)
-from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
-    deprecate, retrieve_timesteps, rescale_noise_cfg,
-    PipelineImageInput, StableDiffusionXLPipelineOutput
-)
-
-
-class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
-    def _get_add_time_ids(
-        self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
-    ):
-        add_time_ids = list(original_size + crops_coords_top_left + target_size)
-        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
-        return add_time_ids
-
-    def __call__(
-        self,
-        prompt: Union[str, List[str]] = None,
-        prompt_2: Optional[Union[str, List[str]]] = None,
-        height: Optional[int] = None,
-        width: Optional[int] = None,
-        num_inference_steps: int = 50,
-        timesteps: List[int] = None,
-        denoising_end: Optional[float] = None,
-        guidance_scale: float = 5.0,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
-        negative_prompt_2: Optional[Union[str, List[str]]] = None,
-        num_images_per_prompt: Optional[int] = 1,
-        eta: float = 0.0,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
-        ip_adapter_image: Optional[PipelineImageInput] = None,
-        output_type: Optional[str] = "pil",
-        return_dict: bool = True,
-        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
-        guidance_rescale: float = 0.0,
-        original_size: Optional[Tuple[int, int]] = None,
-        crops_coords_top_left: Tuple[int, int] = (0, 0),
-        target_size: Optional[Tuple[int, int]] = None,
-        negative_original_size: Optional[Tuple[int, int]] = None,
-        negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
-        negative_target_size: Optional[Tuple[int, int]] = None,
-        clip_skip: Optional[int] = None,
-        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
-        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
-        **kwargs,
-    ):
-        
-        callback = kwargs.pop("callback", None)
-        callback_steps = kwargs.pop("callback_steps", None)
-
-        if callback is not None:
-            deprecate(
-                "callback",
-                "1.0.0",
-                "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
-            )
-        if callback_steps is not None:
-            deprecate(
-                "callback_steps",
-                "1.0.0",
-                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
-            )
-
-        # 0. Default height and width to unet
-        height = height or self.default_sample_size * self.vae_scale_factor
-        width = width or self.default_sample_size * self.vae_scale_factor
-
-        original_size = original_size or (height, width)
-        target_size = target_size or (height, width)
-
-        # 1. Check inputs. Raise error if not correct
-        self.check_inputs(
-            prompt,
-            prompt_2,
-            height,
-            width,
-            callback_steps,
-            negative_prompt,
-            negative_prompt_2,
-            prompt_embeds,
-            negative_prompt_embeds,
-            pooled_prompt_embeds,
-            negative_pooled_prompt_embeds,
-            callback_on_step_end_tensor_inputs,
-        )
-
-        self._guidance_scale = guidance_scale
-        self._guidance_rescale = guidance_rescale
-        self._clip_skip = clip_skip
-        self._cross_attention_kwargs = cross_attention_kwargs
-        self._denoising_end = denoising_end
-
-        # 2. Define call parameters
-        if prompt is not None and isinstance(prompt, str):
-            batch_size = 1
-        elif prompt is not None and isinstance(prompt, list):
-            batch_size = len(prompt)
-        else:
-            batch_size = prompt_embeds.shape[0]
-
-        device = 'cpu'
-
-        # 3. Encode input prompt
-        lora_scale = (
-            self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
-        )
-
-        (
-            prompt_embeds,
-            negative_prompt_embeds,
-            pooled_prompt_embeds,
-            negative_pooled_prompt_embeds,
-        ) = self.encode_prompt(
-            prompt=prompt,
-            prompt_2=prompt_2,
-            device=device,
-            num_images_per_prompt=num_images_per_prompt,
-            do_classifier_free_guidance=self.do_classifier_free_guidance,
-            negative_prompt=negative_prompt,
-            negative_prompt_2=negative_prompt_2,
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-            pooled_prompt_embeds=pooled_prompt_embeds,
-            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-            lora_scale=lora_scale,
-            clip_skip=self.clip_skip,
-        )
-
-        # 4. Prepare timesteps
-        timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
-
-        # 5. Prepare latent variables
-        num_channels_latents = self.unet.config.in_channels
-        latents = self.prepare_latents(
-            batch_size * num_images_per_prompt,
-            num_channels_latents,
-            height,
-            width,
-            prompt_embeds.dtype,
-            device,
-            generator,
-            latents,
-        )
-
-        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-
-        # 7. Prepare added time ids & embeddings
-        add_text_embeds = pooled_prompt_embeds
-        if self.text_encoder_2 is None:
-            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
-        else:
-            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
-
-        add_time_ids = self._get_add_time_ids(
-            original_size,
-            crops_coords_top_left,
-            target_size,
-            dtype=prompt_embeds.dtype,
-            text_encoder_projection_dim=text_encoder_projection_dim,
-        )
-        if negative_original_size is not None and negative_target_size is not None:
-            negative_add_time_ids = self._get_add_time_ids(
-                negative_original_size,
-                negative_crops_coords_top_left,
-                negative_target_size,
-                dtype=prompt_embeds.dtype,
-                text_encoder_projection_dim=text_encoder_projection_dim,
-            )
-        else:
-            negative_add_time_ids = add_time_ids
-
-        if self.do_classifier_free_guidance:
-            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
-            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
-            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
-
-        prompt_embeds = prompt_embeds.to(device)
-        add_text_embeds = add_text_embeds.to(device)
-        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
-
-        if ip_adapter_image is not None:
-            image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
-            if self.do_classifier_free_guidance:
-                image_embeds = torch.cat([negative_image_embeds, image_embeds])
-                image_embeds = image_embeds.to(device)
-
-        # 8. Denoising loop
-        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
-
-        # 8.1 Apply denoising_end
-        if (
-            self.denoising_end is not None
-            and isinstance(self.denoising_end, float)
-            and self.denoising_end > 0
-            and self.denoising_end < 1
-        ):
-            discrete_timestep_cutoff = int(
-                round(
-                    self.scheduler.config.num_train_timesteps
-                    - (self.denoising_end * self.scheduler.config.num_train_timesteps)
-                )
-            )
-            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
-            timesteps = timesteps[:num_inference_steps]
-
-        # 9. Optionally get Guidance Scale Embedding
-        timestep_cond = None
-        if self.unet.config.time_cond_proj_dim is not None:
-            guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
-            timestep_cond = self.get_guidance_scale_embedding(
-                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
-            ).to(device=device, dtype=latents.dtype)
-
-        self._num_timesteps = len(timesteps)
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                # expand the latents if we are doing classifier free guidance
-                latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
-
-                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-
-                # predict the noise residual
-                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
-                if ip_adapter_image is not None:
-                    added_cond_kwargs["image_embeds"] = image_embeds
-                noise_pred = self.unet(
-                    latent_model_input,
-                    t,
-                    encoder_hidden_states=prompt_embeds,
-                    added_cond_kwargs=added_cond_kwargs,
-                )['sample']
-
-                # perform guidance
-                if self.do_classifier_free_guidance:
-                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-                if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
-                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
-                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
-
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-
-                if callback_on_step_end is not None:
-                    callback_kwargs = {}
-                    for k in callback_on_step_end_tensor_inputs:
-                        callback_kwargs[k] = locals()[k]
-                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
-
-                    latents = callback_outputs.pop("latents", latents)
-                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
-                    add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
-                    negative_pooled_prompt_embeds = callback_outputs.pop(
-                        "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
-                    )
-                    add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
-                    negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
-
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                    progress_bar.update()
-                    if callback is not None and i % callback_steps == 0:
-                        step_idx = i // getattr(self.scheduler, "order", 1)
-                        callback(step_idx, t, latents)
-
-        if not output_type == "latent":
-            # make sure the VAE is in float32 mode, as it overflows in float16
-            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
-
-            if needs_upcasting:
-                self.upcast_vae()
-                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
-
-            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-
-            # cast back to fp16 if needed
-            if needs_upcasting:
-                self.vae.to(dtype=torch.float16)
-        else:
-            image = latents
-
-        if not output_type == "latent":
-            # apply watermark if available
-            if self.watermark is not None:
-                image = self.watermark.apply_watermark(image)
-
-            image = self.image_processor.postprocess(image.detach(), output_type=output_type)
-
-        # Offload all models
-        self.maybe_free_model_hooks()
-
-        if not return_dict:
-            return (image,)
-
-        return StableDiffusionXLPipelineOutput(images=image)
-
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--model-id', default="stabilityai/stable-diffusion-xl-base-1.0", type=str)
-parser.add_argument('--precision', default='fp32', type=str)
-parser.add_argument('--base-output-dir', default="./output", type=str)
-parser.add_argument('--quantized-unet', default="./saved_results", type=str)
-parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
-parser.add_argument("--load", action="store_true")
-parser.add_argument('--iters', default=5000, type=int, help="Num of image generated.")
-parser.add_argument('--output-dir-name', default=None, type=str)
-parser.add_argument('--output-dir-name-postfix', default=None, type=str)
-parser.add_argument('--captions-fname', default="captions_5k.tsv", type=str)
-parser.add_argument('--guidance', default=8.0, type=float)
-parser.add_argument('--scheduler', default="euler", type=str)
-parser.add_argument('--steps', default=20, type=int)
-parser.add_argument('--negative-prompt', default="normal quality, low quality, worst quality, low res, blurry, nsfw, nude", type=str)
-parser.add_argument('--latent-path', default="latents.pt", type=str)
-parser.add_argument('--generator-seed', default=None, type=int)
-parser.add_argument("--refiner", dest='refiner', action="store_true",
-                    help="Whether to add a refiner to the SDXL pipeline."
-                          "Applicable only with --model-id=xl")
-parser.add_argument("--no-refiner", dest='refiner', action="store_false",
-                    help="Whether to add a refiner to the SDXL pipeline."
-                          "Applicable only with --model-id=xl")
-
-args = parser.parse_args()
-
-# Init the logger
-logging.basicConfig(
-    format='%(asctime)s %(levelname)-8s %(message)s',
-    level=logging.INFO,
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
-
-if args.latent_path and args.generator_seed:
-    raise ValueError(
-        "Cannot specify both --latent-path and --generator-seed"
-    )
-
-if args.precision == "fp16":
-    dtype = torch.float16
-elif args.precision == "bf16":
-    dtype = torch.bfloat16
-else:
-    dtype = torch.float32
-
-# Initialize defaults
-device = torch.device('cpu')
-world_size = 1
-rank = 0
-
-# load frozen latent
-latent_noise = None
-if args.latent_path:
-    logging.info(f"[{rank}] loading latent from: {args.latent_path}")
-    latent_noise = torch.load(args.latent_path).to(dtype)
-
-logging.info(f"[{rank}] args: {args}")
-logging.info(f"[{rank}] world_size: {world_size}")
-logging.info(f"[{rank}] device: {device}")
-
-logging.info(f"[{rank}] using captions from: {args.captions_fname}")
-df = pd.read_csv(args.captions_fname, sep='\t')
-logging.info(f"[{rank}] {len(df)} captions loaded")
-
-# split captions among ranks
-df = df[rank::world_size]
-logging.info(f"[{rank}] {len(df)} captions assigned")
-
-# Build the pipeline
-schedulers = {
-    "ddpm": DDPMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
-    "ddim": DDIMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
-    "euler_anc": EulerAncestralDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
-    "euler": EulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
-}
-pipe = StableDiffusionXLPipelineSQ.from_pretrained(
-    "stabilityai/stable-diffusion-xl-base-1.0",
-    torch_dtype=dtype,
-    use_safetensors=True,
-)
-pipe = pipe.to(dtype)  # Ensure all modules are set as dtype
-pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
-
-if args.refiner:
-    refiner_pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(args.model_id,
-                                                                    scheduler=schedulers[args.scheduler],
-                                                                    safety_checker=None,
-                                                                    add_watermarker=False,
-                                                                    variant="fp16" if args.precision == 'fp16' else None,
-                                                                    torch_dtype=dtype)
-
-if args.optimized and args.load:
-    from neural_compressor.torch.quantization import load
-    example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
-                "timestep": torch.tensor(951.0),
-                "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
-                "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
-                                    'time_ids': torch.tensor([[1024., 1024.,    0.,    0., 1024., 1024.],
-                                                                [1024., 1024.,    0.,    0., 1024., 1024.]], dtype=dtype)},}
-    q_unet = load(args.quantized_unet)
-    for _ in range(2):
-        q_unet(**example_inputs)
-    print("Loaded Quantized Model")
-    setattr(q_unet, "config", pipe.unet.config)
-    pipe.unet = q_unet
-
-pipe.set_progress_bar_config(disable=True)
-logging.info(f"[{rank}] Pipeline initialized: {pipe}")
-
-if args.refiner:
-    refiner_pipe = refiner_pipe.to(device)
-    refiner_pipe.set_progress_bar_config(disable=True)
-    logging.info(f"[{rank}] Refiner pipeline initialized: {refiner_pipe}")
-
-# Output directory
-output_dir = args.output_dir_name or f"{args.model_id.replace('/','--')}__{args.scheduler}__{args.steps}__{args.guidance}__{args.precision}"
-if args.output_dir_name_postfix is not None:
-    output_dir = f"{output_dir}_{args.output_dir_name_postfix}"
-
-output_dir = os.path.join(args.base_output_dir, output_dir)
-
-# Ensure the output directory exists
-if not os.path.exists(output_dir):
-    os.makedirs(output_dir)
-
-# Create a temporary directory to atomically move the images
-tmp_dir = tempfile.mkdtemp()
-
-# Generate the images
-for index, row in df.iterrows():
-    image_id = row['image_id']
-    caption_id = row['id']
-    caption_text = row['caption']
-
-    destination_path = os.path.join(output_dir, f"{caption_id}.png")
-
-    if index >= args.iters:
-        break
-
-    # Check if the image already exists in the output directory
-    if not os.path.exists(destination_path):
-        # Generate the image
-        print(index, caption_text)
-        tic = time.time()
-        image = pipe(prompt=caption_text,
-                     negative_prompt="normal quality, low quality, worst quality, low res, blurry, nsfw, nude",
-                     guidance_scale=8.0,
-                     generator=torch.Generator(device=device).manual_seed(args.generator_seed) if args.generator_seed else None,
-                     latents=latent_noise,
-                     num_inference_steps=20).images[0]
-        toc = time.time()
-        print("Time taken : ",toc-tic)
-
-        if args.refiner:
-            image = refiner_pipe(caption_text,
-                                 image=image).images[0]
-
-        # Save the image
-        image_path_tmp = os.path.join(tmp_dir, f"{caption_id}.png")
-        image.save(image_path_tmp)
-        shutil.move(image_path_tmp, destination_path)
-
-        logging.info(f"[{rank}] Saved image {caption_id}: {caption_text}")
+import os
+import logging
+import tempfile
+import shutil
+import argparse
+import pandas as pd
+import time
+import torch
+import intel_extension_for_pytorch as ipex
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from diffusers import (
+    DDPMScheduler,
+    DDIMScheduler,
+    EulerDiscreteScheduler,
+    EulerAncestralDiscreteScheduler,
+    StableDiffusionXLPipeline,
+    StableDiffusionXLImg2ImgPipeline,
+)
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+    deprecate, retrieve_timesteps, rescale_noise_cfg,
+    PipelineImageInput, StableDiffusionXLPipelineOutput
+)
+
+
+class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
+    def _get_add_time_ids(
+        self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
+    ):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        prompt_2: Optional[Union[str, List[str]]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        timesteps: List[int] = None,
+        denoising_end: Optional[float] = None,
+        guidance_scale: float = 5.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt_2: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        ip_adapter_image: Optional[PipelineImageInput] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        guidance_rescale: float = 0.0,
+        original_size: Optional[Tuple[int, int]] = None,
+        crops_coords_top_left: Tuple[int, int] = (0, 0),
+        target_size: Optional[Tuple[int, int]] = None,
+        negative_original_size: Optional[Tuple[int, int]] = None,
+        negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+        negative_target_size: Optional[Tuple[int, int]] = None,
+        clip_skip: Optional[int] = None,
+        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        **kwargs,
+    ):
+        
+        callback = kwargs.pop("callback", None)
+        callback_steps = kwargs.pop("callback_steps", None)
+
+        if callback is not None:
+            deprecate(
+                "callback",
+                "1.0.0",
+                "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+            )
+        if callback_steps is not None:
+            deprecate(
+                "callback_steps",
+                "1.0.0",
+                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+            )
+
+        # 0. Default height and width to unet
+        height = height or self.default_sample_size * self.vae_scale_factor
+        width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+            callback_on_step_end_tensor_inputs,
+        )
+
+        self._guidance_scale = guidance_scale
+        self._guidance_rescale = guidance_rescale
+        self._clip_skip = clip_skip
+        self._cross_attention_kwargs = cross_attention_kwargs
+        self._denoising_end = denoising_end
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = 'cpu'
+
+        # 3. Encode input prompt
+        lora_scale = (
+            self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+        )
+
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=self.do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=lora_scale,
+            clip_skip=self.clip_skip,
+        )
+
+        # 4. Prepare timesteps
+        timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        if self.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+        add_time_ids = self._get_add_time_ids(
+            original_size,
+            crops_coords_top_left,
+            target_size,
+            dtype=prompt_embeds.dtype,
+            text_encoder_projection_dim=text_encoder_projection_dim,
+        )
+        if negative_original_size is not None and negative_target_size is not None:
+            negative_add_time_ids = self._get_add_time_ids(
+                negative_original_size,
+                negative_crops_coords_top_left,
+                negative_target_size,
+                dtype=prompt_embeds.dtype,
+                text_encoder_projection_dim=text_encoder_projection_dim,
+            )
+        else:
+            negative_add_time_ids = add_time_ids
+
+        if self.do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        if ip_adapter_image is not None:
+            image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
+            if self.do_classifier_free_guidance:
+                image_embeds = torch.cat([negative_image_embeds, image_embeds])
+                image_embeds = image_embeds.to(device)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 8.1 Apply denoising_end
+        if (
+            self.denoising_end is not None
+            and isinstance(self.denoising_end, float)
+            and self.denoising_end > 0
+            and self.denoising_end < 1
+        ):
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (self.denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        # 9. Optionally get Guidance Scale Embedding
+        timestep_cond = None
+        if self.unet.config.time_cond_proj_dim is not None:
+            guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
+            timestep_cond = self.get_guidance_scale_embedding(
+                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+            ).to(device=device, dtype=latents.dtype)
+
+        self._num_timesteps = len(timesteps)
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                if ip_adapter_image is not None:
+                    added_cond_kwargs["image_embeds"] = image_embeds
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    added_cond_kwargs=added_cond_kwargs,
+                )['sample']
+
+                # perform guidance
+                if self.do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
+                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                if callback_on_step_end is not None:
+                    callback_kwargs = {}
+                    for k in callback_on_step_end_tensor_inputs:
+                        callback_kwargs[k] = locals()[k]
+                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+                    latents = callback_outputs.pop("latents", latents)
+                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+                    add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
+                    negative_pooled_prompt_embeds = callback_outputs.pop(
+                        "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
+                    )
+                    add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
+                    negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        step_idx = i // getattr(self.scheduler, "order", 1)
+                        callback(step_idx, t, latents)
+
+        if not output_type == "latent":
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+            # cast back to fp16 if needed
+            if needs_upcasting:
+                self.vae.to(dtype=torch.float16)
+        else:
+            image = latents
+
+        if not output_type == "latent":
+            # apply watermark if available
+            if self.watermark is not None:
+                image = self.watermark.apply_watermark(image)
+
+            image = self.image_processor.postprocess(image.detach(), output_type=output_type)
+
+        # Offload all models
+        self.maybe_free_model_hooks()
+
+        if not return_dict:
+            return (image,)
+
+        return StableDiffusionXLPipelineOutput(images=image)
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model-id', default="stabilityai/stable-diffusion-xl-base-1.0", type=str)
+parser.add_argument('--precision', default='fp32', type=str)
+parser.add_argument('--base-output-dir', default="./output", type=str)
+parser.add_argument('--quantized-unet', default="./saved_results", type=str)
+parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
+parser.add_argument("--load", action="store_true")
+parser.add_argument('--iters', default=5000, type=int, help="Num of image generated.")
+parser.add_argument('--output-dir-name', default=None, type=str)
+parser.add_argument('--output-dir-name-postfix', default=None, type=str)
+parser.add_argument('--captions-fname', default="captions_5k.tsv", type=str)
+parser.add_argument('--guidance', default=8.0, type=float)
+parser.add_argument('--scheduler', default="euler", type=str)
+parser.add_argument('--steps', default=20, type=int)
+parser.add_argument('--negative-prompt', default="normal quality, low quality, worst quality, low res, blurry, nsfw, nude", type=str)
+parser.add_argument('--latent-path', default="latents.pt", type=str)
+parser.add_argument('--generator-seed', default=None, type=int)
+parser.add_argument("--refiner", dest='refiner', action="store_true",
+                    help="Whether to add a refiner to the SDXL pipeline."
+                          "Applicable only with --model-id=xl")
+parser.add_argument("--no-refiner", dest='refiner', action="store_false",
+                    help="Whether to add a refiner to the SDXL pipeline."
+                          "Applicable only with --model-id=xl")
+
+args = parser.parse_args()
+
+# Init the logger
+logging.basicConfig(
+    format='%(asctime)s %(levelname)-8s %(message)s',
+    level=logging.INFO,
+    datefmt='%Y-%m-%d %H:%M:%S'
+)
+
+if args.latent_path and args.generator_seed:
+    raise ValueError(
+        "Cannot specify both --latent-path and --generator-seed"
+    )
+
+if args.precision == "fp16":
+    dtype = torch.float16
+elif args.precision == "bf16":
+    dtype = torch.bfloat16
+else:
+    dtype = torch.float32
+
+# Initialize defaults
+device = torch.device('cpu')
+world_size = 1
+rank = 0
+
+# load frozen latent
+latent_noise = None
+if args.latent_path:
+    logging.info(f"[{rank}] loading latent from: {args.latent_path}")
+    latent_noise = torch.load(args.latent_path).to(dtype)
+
+logging.info(f"[{rank}] args: {args}")
+logging.info(f"[{rank}] world_size: {world_size}")
+logging.info(f"[{rank}] device: {device}")
+
+logging.info(f"[{rank}] using captions from: {args.captions_fname}")
+df = pd.read_csv(args.captions_fname, sep='\t')
+logging.info(f"[{rank}] {len(df)} captions loaded")
+
+# split captions among ranks
+df = df[rank::world_size]
+logging.info(f"[{rank}] {len(df)} captions assigned")
+
+# Build the pipeline
+schedulers = {
+    "ddpm": DDPMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+    "ddim": DDIMScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+    "euler_anc": EulerAncestralDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+    "euler": EulerDiscreteScheduler.from_pretrained(args.model_id, subfolder="scheduler"),
+}
+pipe = StableDiffusionXLPipelineSQ.from_pretrained(
+    "stabilityai/stable-diffusion-xl-base-1.0",
+    torch_dtype=dtype,
+    use_safetensors=True,
+)
+pipe = pipe.to(dtype)  # Ensure all modules are set as dtype
+pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
+
+if args.refiner:
+    refiner_pipe = StableDiffusionXLImg2ImgPipeline.from_pretrained(args.model_id,
+                                                                    scheduler=schedulers[args.scheduler],
+                                                                    safety_checker=None,
+                                                                    add_watermarker=False,
+                                                                    variant="fp16" if args.precision == 'fp16' else None,
+                                                                    torch_dtype=dtype)
+
+if args.optimized and args.load:
+    from neural_compressor.torch.quantization import load
+    example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
+                "timestep": torch.tensor(951.0),
+                "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
+                "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
+                                    'time_ids': torch.tensor([[1024., 1024.,    0.,    0., 1024., 1024.],
+                                                                [1024., 1024.,    0.,    0., 1024., 1024.]], dtype=dtype)},}
+    q_unet = load(args.quantized_unet)
+    for _ in range(2):
+        q_unet(**example_inputs)
+    print("Loaded Quantized Model")
+    setattr(q_unet, "config", pipe.unet.config)
+    pipe.unet = q_unet
+
+pipe.set_progress_bar_config(disable=True)
+logging.info(f"[{rank}] Pipeline initialized: {pipe}")
+
+if args.refiner:
+    refiner_pipe = refiner_pipe.to(device)
+    refiner_pipe.set_progress_bar_config(disable=True)
+    logging.info(f"[{rank}] Refiner pipeline initialized: {refiner_pipe}")
+
+# Output directory
+output_dir = args.output_dir_name or f"{args.model_id.replace('/','--')}__{args.scheduler}__{args.steps}__{args.guidance}__{args.precision}"
+if args.output_dir_name_postfix is not None:
+    output_dir = f"{output_dir}_{args.output_dir_name_postfix}"
+
+output_dir = os.path.join(args.base_output_dir, output_dir)
+
+# Ensure the output directory exists
+if not os.path.exists(output_dir):
+    os.makedirs(output_dir)
+
+# Create a temporary directory to atomically move the images
+tmp_dir = tempfile.mkdtemp()
+
+# Generate the images
+for index, row in df.iterrows():
+    image_id = row['image_id']
+    caption_id = row['id']
+    caption_text = row['caption']
+
+    destination_path = os.path.join(output_dir, f"{caption_id}.png")
+
+    if index >= args.iters:
+        break
+
+    # Check if the image already exists in the output directory
+    if not os.path.exists(destination_path):
+        # Generate the image
+        print(index, caption_text)
+        tic = time.time()
+        image = pipe(prompt=caption_text,
+                     negative_prompt="normal quality, low quality, worst quality, low res, blurry, nsfw, nude",
+                     guidance_scale=8.0,
+                     generator=torch.Generator(device=device).manual_seed(args.generator_seed) if args.generator_seed else None,
+                     latents=latent_noise,
+                     num_inference_steps=20).images[0]
+        toc = time.time()
+        print("Time taken : ",toc-tic)
+
+        if args.refiner:
+            image = refiner_pipe(caption_text,
+                                 image=image).images[0]
+
+        # Save the image
+        image_path_tmp = os.path.join(tmp_dir, f"{caption_id}.png")
+        image.save(image_path_tmp)
+        shutil.move(image_path_tmp, destination_path)
+
+        logging.info(f"[{rank}] Saved image {caption_id}: {caption_text}")
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
similarity index 93%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
index f1fe1f7e20f..257716396d2 100644
--- a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
+++ b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/requirements.txt
@@ -1,8 +1,8 @@
-diffusers
-accelerate
-torch
-transformers
-tensorboard
-intel_extension_for_pytorch
-tqdm
+diffusers
+accelerate
+torch
+transformers
+tensorboard
+intel_extension_for_pytorch
+tqdm
 open-clip-torch
\ No newline at end of file
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
similarity index 97%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
index 613c1e6f3ae..a87851e127c 100644
--- a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
+++ b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/smooth_quant/sdxl_smooth_quant.py
@@ -1,437 +1,437 @@
-
-import os
-import argparse
-import torch
-import intel_extension_for_pytorch as ipex
-from diffusers import EulerDiscreteScheduler, StableDiffusionXLPipeline
-from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
-    deprecate, retrieve_timesteps, rescale_noise_cfg,
-    PipelineImageInput, StableDiffusionXLPipelineOutput
-)
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-
-def prompts2images(pipeline, prompts, **kwargs):
-    images = pipeline(
-        prompt=prompts,
-        num_inference_steps=kwargs["n_steps"],
-        negative_prompt=[
-            "normal quality, low quality, worst quality, low res, blurry, nsfw, nude"
-        ]
-        * len(prompts),
-        latents=kwargs["latent"],
-        guidance_scale=8.0,  # MLPerf requirements
-    ).images
-    return images
-
-def save_images(prompts, images, save_dir, prefix='ref'):
-    for prompt, image in zip(prompts, images):
-        image_name = f"{prefix}_{'_'.join(prompt.replace('/', ' ').split(' '))}.jpg"
-        image.save(os.path.join(save_dir, image_name))
-
-def do_calibration(pipeline, calibration_prompts, **kwargs):
-    for i_th, prompts in enumerate(calibration_prompts):
-        if i_th >= kwargs["calib_size"]:
-            return
-        prompts2images(pipeline, prompts, **kwargs)
-
-class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
-    def _get_add_time_ids(
-        self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
-    ):
-        add_time_ids = list(original_size + crops_coords_top_left + target_size)
-        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
-        return add_time_ids
-
-    def __call__(
-        self,
-        prompt: Union[str, List[str]] = None,
-        prompt_2: Optional[Union[str, List[str]]] = None,
-        height: Optional[int] = None,
-        width: Optional[int] = None,
-        num_inference_steps: int = 50,
-        timesteps: List[int] = None,
-        denoising_end: Optional[float] = None,
-        guidance_scale: float = 5.0,
-        negative_prompt: Optional[Union[str, List[str]]] = None,
-        negative_prompt_2: Optional[Union[str, List[str]]] = None,
-        num_images_per_prompt: Optional[int] = 1,
-        eta: float = 0.0,
-        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
-        latents: Optional[torch.FloatTensor] = None,
-        prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
-        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
-        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
-        ip_adapter_image: Optional[PipelineImageInput] = None,
-        output_type: Optional[str] = "pil",
-        return_dict: bool = True,
-        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
-        guidance_rescale: float = 0.0,
-        original_size: Optional[Tuple[int, int]] = None,
-        crops_coords_top_left: Tuple[int, int] = (0, 0),
-        target_size: Optional[Tuple[int, int]] = None,
-        negative_original_size: Optional[Tuple[int, int]] = None,
-        negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
-        negative_target_size: Optional[Tuple[int, int]] = None,
-        clip_skip: Optional[int] = None,
-        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
-        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
-        **kwargs,
-    ):
-        
-        callback = kwargs.pop("callback", None)
-        callback_steps = kwargs.pop("callback_steps", None)
-
-        if callback is not None:
-            deprecate(
-                "callback",
-                "1.0.0",
-                "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
-            )
-        if callback_steps is not None:
-            deprecate(
-                "callback_steps",
-                "1.0.0",
-                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
-            )
-
-        # 0. Default height and width to unet
-        height = height or self.default_sample_size * self.vae_scale_factor
-        width = width or self.default_sample_size * self.vae_scale_factor
-
-        original_size = original_size or (height, width)
-        target_size = target_size or (height, width)
-
-        # 1. Check inputs. Raise error if not correct
-        self.check_inputs(
-            prompt,
-            prompt_2,
-            height,
-            width,
-            callback_steps,
-            negative_prompt,
-            negative_prompt_2,
-            prompt_embeds,
-            negative_prompt_embeds,
-            pooled_prompt_embeds,
-            negative_pooled_prompt_embeds,
-            callback_on_step_end_tensor_inputs,
-        )
-
-        self._guidance_scale = guidance_scale
-        self._guidance_rescale = guidance_rescale
-        self._clip_skip = clip_skip
-        self._cross_attention_kwargs = cross_attention_kwargs
-        self._denoising_end = denoising_end
-
-        # 2. Define call parameters
-        if prompt is not None and isinstance(prompt, str):
-            batch_size = 1
-        elif prompt is not None and isinstance(prompt, list):
-            batch_size = len(prompt)
-        else:
-            batch_size = prompt_embeds.shape[0]
-
-        device = "cpu"
-
-        # 3. Encode input prompt
-        lora_scale = (
-            self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
-        )
-
-        (
-            prompt_embeds,
-            negative_prompt_embeds,
-            pooled_prompt_embeds,
-            negative_pooled_prompt_embeds,
-        ) = self.encode_prompt(
-            prompt=prompt,
-            prompt_2=prompt_2,
-            device=device,
-            num_images_per_prompt=num_images_per_prompt,
-            do_classifier_free_guidance=self.do_classifier_free_guidance,
-            negative_prompt=negative_prompt,
-            negative_prompt_2=negative_prompt_2,
-            prompt_embeds=prompt_embeds,
-            negative_prompt_embeds=negative_prompt_embeds,
-            pooled_prompt_embeds=pooled_prompt_embeds,
-            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-            lora_scale=lora_scale,
-            clip_skip=self.clip_skip,
-        )
-
-        # 4. Prepare timesteps
-        timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
-
-        # 5. Prepare latent variables
-        num_channels_latents = self.unet.config.in_channels
-        latents = self.prepare_latents(
-            batch_size * num_images_per_prompt,
-            num_channels_latents,
-            height,
-            width,
-            prompt_embeds.dtype,
-            device,
-            generator,
-            latents,
-        )
-
-        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
-        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
-
-        # 7. Prepare added time ids & embeddings
-        add_text_embeds = pooled_prompt_embeds
-        if self.text_encoder_2 is None:
-            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
-        else:
-            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
-
-        add_time_ids = self._get_add_time_ids(
-            original_size,
-            crops_coords_top_left,
-            target_size,
-            dtype=prompt_embeds.dtype,
-            text_encoder_projection_dim=text_encoder_projection_dim,
-        )
-        if negative_original_size is not None and negative_target_size is not None:
-            negative_add_time_ids = self._get_add_time_ids(
-                negative_original_size,
-                negative_crops_coords_top_left,
-                negative_target_size,
-                dtype=prompt_embeds.dtype,
-                text_encoder_projection_dim=text_encoder_projection_dim,
-            )
-        else:
-            negative_add_time_ids = add_time_ids
-
-        if self.do_classifier_free_guidance:
-            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
-            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
-            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
-
-        prompt_embeds = prompt_embeds.to(device)
-        add_text_embeds = add_text_embeds.to(device)
-        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
-
-        if ip_adapter_image is not None:
-            image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
-            if self.do_classifier_free_guidance:
-                image_embeds = torch.cat([negative_image_embeds, image_embeds])
-                image_embeds = image_embeds.to(device)
-
-        # 8. Denoising loop
-        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
-
-        # 8.1 Apply denoising_end
-        if (
-            self.denoising_end is not None
-            and isinstance(self.denoising_end, float)
-            and self.denoising_end > 0
-            and self.denoising_end < 1
-        ):
-            discrete_timestep_cutoff = int(
-                round(
-                    self.scheduler.config.num_train_timesteps
-                    - (self.denoising_end * self.scheduler.config.num_train_timesteps)
-                )
-            )
-            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
-            timesteps = timesteps[:num_inference_steps]
-
-        # 9. Optionally get Guidance Scale Embedding
-        timestep_cond = None
-        if self.unet.config.time_cond_proj_dim is not None:
-            guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
-            timestep_cond = self.get_guidance_scale_embedding(
-                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
-            ).to(device=device, dtype=latents.dtype)
-
-        self._num_timesteps = len(timesteps)
-        with self.progress_bar(total=num_inference_steps) as progress_bar:
-            for i, t in enumerate(timesteps):
-                # expand the latents if we are doing classifier free guidance
-                latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
-
-                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
-
-                # predict the noise residual
-                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
-                if ip_adapter_image is not None:
-                    added_cond_kwargs["image_embeds"] = image_embeds
-                noise_pred = self.unet(
-                    latent_model_input,
-                    t,
-                    encoder_hidden_states=prompt_embeds,
-                    added_cond_kwargs=added_cond_kwargs,
-                )['sample']
-
-                # perform guidance
-                if self.do_classifier_free_guidance:
-                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
-                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
-
-                if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
-                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
-                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
-
-                # compute the previous noisy sample x_t -> x_t-1
-                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
-
-                if callback_on_step_end is not None:
-                    callback_kwargs = {}
-                    for k in callback_on_step_end_tensor_inputs:
-                        callback_kwargs[k] = locals()[k]
-                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
-
-                    latents = callback_outputs.pop("latents", latents)
-                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
-                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
-                    add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
-                    negative_pooled_prompt_embeds = callback_outputs.pop(
-                        "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
-                    )
-                    add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
-                    negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
-
-                # call the callback, if provided
-                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
-                    progress_bar.update()
-                    if callback is not None and i % callback_steps == 0:
-                        step_idx = i // getattr(self.scheduler, "order", 1)
-                        callback(step_idx, t, latents)
-
-        if not output_type == "latent":
-            # make sure the VAE is in float32 mode, as it overflows in float16
-            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
-
-            if needs_upcasting:
-                self.upcast_vae()
-                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
-
-            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
-
-            # cast back to fp16 if needed
-            if needs_upcasting:
-                self.vae.to(dtype=torch.float16)
-        else:
-            image = latents
-
-        if not output_type == "latent":
-            # apply watermark if available
-            if self.watermark is not None:
-                image = self.watermark.apply_watermark(image)
-
-            image = self.image_processor.postprocess(image.detach(), output_type=output_type)
-
-        # Offload all models
-        self.maybe_free_model_hooks()
-
-        if not return_dict:
-            return (image,)
-
-        return StableDiffusionXLPipelineOutput(images=image)
-
-
-def main():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--model_name_or_path", type=str, default="stabilityai/stable-diffusion-xl-base-1.0"
-    )
-    parser.add_argument("--quantize", action="store_true")
-    parser.add_argument("--load", action="store_true")
-    parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
-    parser.add_argument("--performance", action="store_true")
-    parser.add_argument("--n_steps", type=int, default=20)
-    parser.add_argument("--batch-size", type=int, default=1)
-    parser.add_argument("--calib-size", type=int, default=10)
-    parser.add_argument("--latent", type=str, default="latents.pt")
-    parser.add_argument("--alpha", type=float, default=0.5, help="SmoothQuant Alpha")
-    parser.add_argument("--output_dir", type=str, default="./saved_results", help="output directory")
-    parser.add_argument("--iters", default=10, type=int, help="For performance measurement only.")
-
-    args = parser.parse_args()
-    os.makedirs(args.output_dir, exist_ok=True)
-
-    args.calib_size = args.calib_size // args.batch_size
-
-    dtype = torch.float32
-
-    pipeline = StableDiffusionXLPipelineSQ.from_pretrained(
-        args.model_name_or_path,
-        torch_dtype=dtype,
-        use_safetensors=True,
-    )
-    pipeline = pipeline.to(dtype)  # Ensure all modules are set as dtype
-    pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
-    
-    # This is a list of prompts
-    cali_prompts = [['A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground.'],
-                    ['The bus is traveling down a two way street.']]
-
-    torch.random.manual_seed(42)
-    if args.latent is not None:
-        init_latent = torch.load(args.latent).to(dtype)
-    else:
-        init_latent = torch.randn((1,4,128,128), dtype=dtype)
-
-    prompts = cali_prompts[0]
-    ref_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
-    save_images(prompts, ref_images, args.output_dir, prefix='ref')
-
-    def forward_loop(model):
-        do_calibration(
-            pipeline=pipeline,
-            calibration_prompts=cali_prompts,
-            calib_size=args.calib_size,
-            n_steps=args.n_steps,
-            latent=init_latent,
-        )
-    
-    if args.quantize:
-        excluded_precisions = ["bf16"]
-        example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
-                        "timestep": torch.tensor(951.0),
-                        "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
-                        "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
-                                              'time_ids': torch.tensor([[1024., 1024.,    0.,    0., 1024., 1024.],
-                                                                        [1024., 1024.,    0.,    0., 1024., 1024.]], dtype=dtype)},}
-        
-        from neural_compressor.torch.quantization import SmoothQuantConfig, prepare, convert
-        quant_config = SmoothQuantConfig(alpha=args.alpha, excluded_precisions=excluded_precisions)
-        user_model = prepare(model=pipeline.unet, quant_config=quant_config, example_inputs=example_inputs)
-        forward_loop(user_model)
-        q_unet = convert(user_model)
-        q_unet.save(args.output_dir)
-    
-    if args.load:
-        if args.optimized:
-            from neural_compressor.torch.quantization import load
-            q_unet = load(os.path.abspath(os.path.expanduser(args.output_dir)))
-        else:
-            q_unet = pipeline.unet
-    if not hasattr(q_unet, "config"):
-        setattr(q_unet, "config", pipeline.unet.config)
-    pipeline.unet = q_unet
-    quant_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
-    save_images(prompts, quant_images, args.output_dir, prefix='quant')
-
-    if args.performance:
-        import time
-
-        total_iters = args.iters * args.batch_size
-        warmup_iters = 5
-        for i in range(total_iters):
-            if i == warmup_iters:
-                start = time.time()
-                prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
-        end = time.time()
-
-        latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
-        throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
-        print("Latency: {:.3f} ms".format(latency * 10**3))
-        print("Throughput: {:.3f} samples/sec".format(throughput))
-        print('Batch size = %d' % args.batch_size)
-
-
-if __name__ == "__main__":
-    main()
+
+import os
+import argparse
+import torch
+import intel_extension_for_pytorch as ipex
+from diffusers import EulerDiscreteScheduler, StableDiffusionXLPipeline
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+    deprecate, retrieve_timesteps, rescale_noise_cfg,
+    PipelineImageInput, StableDiffusionXLPipelineOutput
+)
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+
+def prompts2images(pipeline, prompts, **kwargs):
+    images = pipeline(
+        prompt=prompts,
+        num_inference_steps=kwargs["n_steps"],
+        negative_prompt=[
+            "normal quality, low quality, worst quality, low res, blurry, nsfw, nude"
+        ]
+        * len(prompts),
+        latents=kwargs["latent"],
+        guidance_scale=8.0,  # MLPerf requirements
+    ).images
+    return images
+
+def save_images(prompts, images, save_dir, prefix='ref'):
+    for prompt, image in zip(prompts, images):
+        image_name = f"{prefix}_{'_'.join(prompt.replace('/', ' ').split(' '))}.jpg"
+        image.save(os.path.join(save_dir, image_name))
+
+def do_calibration(pipeline, calibration_prompts, **kwargs):
+    for i_th, prompts in enumerate(calibration_prompts):
+        if i_th >= kwargs["calib_size"]:
+            return
+        prompts2images(pipeline, prompts, **kwargs)
+
+class StableDiffusionXLPipelineSQ(StableDiffusionXLPipeline):
+    def _get_add_time_ids(
+        self, original_size, crops_coords_top_left, target_size, dtype, text_encoder_projection_dim=None
+    ):
+        add_time_ids = list(original_size + crops_coords_top_left + target_size)
+        add_time_ids = torch.tensor([add_time_ids], dtype=dtype)
+        return add_time_ids
+
+    def __call__(
+        self,
+        prompt: Union[str, List[str]] = None,
+        prompt_2: Optional[Union[str, List[str]]] = None,
+        height: Optional[int] = None,
+        width: Optional[int] = None,
+        num_inference_steps: int = 50,
+        timesteps: List[int] = None,
+        denoising_end: Optional[float] = None,
+        guidance_scale: float = 5.0,
+        negative_prompt: Optional[Union[str, List[str]]] = None,
+        negative_prompt_2: Optional[Union[str, List[str]]] = None,
+        num_images_per_prompt: Optional[int] = 1,
+        eta: float = 0.0,
+        generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+        latents: Optional[torch.FloatTensor] = None,
+        prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_prompt_embeds: Optional[torch.FloatTensor] = None,
+        pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        negative_pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+        ip_adapter_image: Optional[PipelineImageInput] = None,
+        output_type: Optional[str] = "pil",
+        return_dict: bool = True,
+        cross_attention_kwargs: Optional[Dict[str, Any]] = None,
+        guidance_rescale: float = 0.0,
+        original_size: Optional[Tuple[int, int]] = None,
+        crops_coords_top_left: Tuple[int, int] = (0, 0),
+        target_size: Optional[Tuple[int, int]] = None,
+        negative_original_size: Optional[Tuple[int, int]] = None,
+        negative_crops_coords_top_left: Tuple[int, int] = (0, 0),
+        negative_target_size: Optional[Tuple[int, int]] = None,
+        clip_skip: Optional[int] = None,
+        callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+        callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+        **kwargs,
+    ):
+        
+        callback = kwargs.pop("callback", None)
+        callback_steps = kwargs.pop("callback_steps", None)
+
+        if callback is not None:
+            deprecate(
+                "callback",
+                "1.0.0",
+                "Passing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+            )
+        if callback_steps is not None:
+            deprecate(
+                "callback_steps",
+                "1.0.0",
+                "Passing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`",
+            )
+
+        # 0. Default height and width to unet
+        height = height or self.default_sample_size * self.vae_scale_factor
+        width = width or self.default_sample_size * self.vae_scale_factor
+
+        original_size = original_size or (height, width)
+        target_size = target_size or (height, width)
+
+        # 1. Check inputs. Raise error if not correct
+        self.check_inputs(
+            prompt,
+            prompt_2,
+            height,
+            width,
+            callback_steps,
+            negative_prompt,
+            negative_prompt_2,
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+            callback_on_step_end_tensor_inputs,
+        )
+
+        self._guidance_scale = guidance_scale
+        self._guidance_rescale = guidance_rescale
+        self._clip_skip = clip_skip
+        self._cross_attention_kwargs = cross_attention_kwargs
+        self._denoising_end = denoising_end
+
+        # 2. Define call parameters
+        if prompt is not None and isinstance(prompt, str):
+            batch_size = 1
+        elif prompt is not None and isinstance(prompt, list):
+            batch_size = len(prompt)
+        else:
+            batch_size = prompt_embeds.shape[0]
+
+        device = "cpu"
+
+        # 3. Encode input prompt
+        lora_scale = (
+            self.cross_attention_kwargs.get("scale", None) if self.cross_attention_kwargs is not None else None
+        )
+
+        (
+            prompt_embeds,
+            negative_prompt_embeds,
+            pooled_prompt_embeds,
+            negative_pooled_prompt_embeds,
+        ) = self.encode_prompt(
+            prompt=prompt,
+            prompt_2=prompt_2,
+            device=device,
+            num_images_per_prompt=num_images_per_prompt,
+            do_classifier_free_guidance=self.do_classifier_free_guidance,
+            negative_prompt=negative_prompt,
+            negative_prompt_2=negative_prompt_2,
+            prompt_embeds=prompt_embeds,
+            negative_prompt_embeds=negative_prompt_embeds,
+            pooled_prompt_embeds=pooled_prompt_embeds,
+            negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+            lora_scale=lora_scale,
+            clip_skip=self.clip_skip,
+        )
+
+        # 4. Prepare timesteps
+        timesteps, num_inference_steps = retrieve_timesteps(self.scheduler, num_inference_steps, device, timesteps)
+
+        # 5. Prepare latent variables
+        num_channels_latents = self.unet.config.in_channels
+        latents = self.prepare_latents(
+            batch_size * num_images_per_prompt,
+            num_channels_latents,
+            height,
+            width,
+            prompt_embeds.dtype,
+            device,
+            generator,
+            latents,
+        )
+
+        # 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
+        extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
+
+        # 7. Prepare added time ids & embeddings
+        add_text_embeds = pooled_prompt_embeds
+        if self.text_encoder_2 is None:
+            text_encoder_projection_dim = int(pooled_prompt_embeds.shape[-1])
+        else:
+            text_encoder_projection_dim = self.text_encoder_2.config.projection_dim
+
+        add_time_ids = self._get_add_time_ids(
+            original_size,
+            crops_coords_top_left,
+            target_size,
+            dtype=prompt_embeds.dtype,
+            text_encoder_projection_dim=text_encoder_projection_dim,
+        )
+        if negative_original_size is not None and negative_target_size is not None:
+            negative_add_time_ids = self._get_add_time_ids(
+                negative_original_size,
+                negative_crops_coords_top_left,
+                negative_target_size,
+                dtype=prompt_embeds.dtype,
+                text_encoder_projection_dim=text_encoder_projection_dim,
+            )
+        else:
+            negative_add_time_ids = add_time_ids
+
+        if self.do_classifier_free_guidance:
+            prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
+            add_text_embeds = torch.cat([negative_pooled_prompt_embeds, add_text_embeds], dim=0)
+            add_time_ids = torch.cat([negative_add_time_ids, add_time_ids], dim=0)
+
+        prompt_embeds = prompt_embeds.to(device)
+        add_text_embeds = add_text_embeds.to(device)
+        add_time_ids = add_time_ids.to(device).repeat(batch_size * num_images_per_prompt, 1)
+
+        if ip_adapter_image is not None:
+            image_embeds, negative_image_embeds = self.encode_image(ip_adapter_image, device, num_images_per_prompt)
+            if self.do_classifier_free_guidance:
+                image_embeds = torch.cat([negative_image_embeds, image_embeds])
+                image_embeds = image_embeds.to(device)
+
+        # 8. Denoising loop
+        num_warmup_steps = max(len(timesteps) - num_inference_steps * self.scheduler.order, 0)
+
+        # 8.1 Apply denoising_end
+        if (
+            self.denoising_end is not None
+            and isinstance(self.denoising_end, float)
+            and self.denoising_end > 0
+            and self.denoising_end < 1
+        ):
+            discrete_timestep_cutoff = int(
+                round(
+                    self.scheduler.config.num_train_timesteps
+                    - (self.denoising_end * self.scheduler.config.num_train_timesteps)
+                )
+            )
+            num_inference_steps = len(list(filter(lambda ts: ts >= discrete_timestep_cutoff, timesteps)))
+            timesteps = timesteps[:num_inference_steps]
+
+        # 9. Optionally get Guidance Scale Embedding
+        timestep_cond = None
+        if self.unet.config.time_cond_proj_dim is not None:
+            guidance_scale_tensor = torch.tensor(self.guidance_scale - 1).repeat(batch_size * num_images_per_prompt)
+            timestep_cond = self.get_guidance_scale_embedding(
+                guidance_scale_tensor, embedding_dim=self.unet.config.time_cond_proj_dim
+            ).to(device=device, dtype=latents.dtype)
+
+        self._num_timesteps = len(timesteps)
+        with self.progress_bar(total=num_inference_steps) as progress_bar:
+            for i, t in enumerate(timesteps):
+                # expand the latents if we are doing classifier free guidance
+                latent_model_input = torch.cat([latents] * 2) if self.do_classifier_free_guidance else latents
+
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+
+                # predict the noise residual
+                added_cond_kwargs = {"text_embeds": add_text_embeds, "time_ids": add_time_ids}
+                if ip_adapter_image is not None:
+                    added_cond_kwargs["image_embeds"] = image_embeds
+                noise_pred = self.unet(
+                    latent_model_input,
+                    t,
+                    encoder_hidden_states=prompt_embeds,
+                    added_cond_kwargs=added_cond_kwargs,
+                )['sample']
+
+                # perform guidance
+                if self.do_classifier_free_guidance:
+                    noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                    noise_pred = noise_pred_uncond + self.guidance_scale * (noise_pred_text - noise_pred_uncond)
+
+                if self.do_classifier_free_guidance and self.guidance_rescale > 0.0:
+                    # Based on 3.4. in https://arxiv.org/pdf/2305.08891.pdf
+                    noise_pred = rescale_noise_cfg(noise_pred, noise_pred_text, guidance_rescale=self.guidance_rescale)
+
+                # compute the previous noisy sample x_t -> x_t-1
+                latents = self.scheduler.step(noise_pred, t, latents, **extra_step_kwargs, return_dict=False)[0]
+
+                if callback_on_step_end is not None:
+                    callback_kwargs = {}
+                    for k in callback_on_step_end_tensor_inputs:
+                        callback_kwargs[k] = locals()[k]
+                    callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+
+                    latents = callback_outputs.pop("latents", latents)
+                    prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+                    negative_prompt_embeds = callback_outputs.pop("negative_prompt_embeds", negative_prompt_embeds)
+                    add_text_embeds = callback_outputs.pop("add_text_embeds", add_text_embeds)
+                    negative_pooled_prompt_embeds = callback_outputs.pop(
+                        "negative_pooled_prompt_embeds", negative_pooled_prompt_embeds
+                    )
+                    add_time_ids = callback_outputs.pop("add_time_ids", add_time_ids)
+                    negative_add_time_ids = callback_outputs.pop("negative_add_time_ids", negative_add_time_ids)
+
+                # call the callback, if provided
+                if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
+                    progress_bar.update()
+                    if callback is not None and i % callback_steps == 0:
+                        step_idx = i // getattr(self.scheduler, "order", 1)
+                        callback(step_idx, t, latents)
+
+        if not output_type == "latent":
+            # make sure the VAE is in float32 mode, as it overflows in float16
+            needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+
+            if needs_upcasting:
+                self.upcast_vae()
+                latents = latents.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
+
+            image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
+
+            # cast back to fp16 if needed
+            if needs_upcasting:
+                self.vae.to(dtype=torch.float16)
+        else:
+            image = latents
+
+        if not output_type == "latent":
+            # apply watermark if available
+            if self.watermark is not None:
+                image = self.watermark.apply_watermark(image)
+
+            image = self.image_processor.postprocess(image.detach(), output_type=output_type)
+
+        # Offload all models
+        self.maybe_free_model_hooks()
+
+        if not return_dict:
+            return (image,)
+
+        return StableDiffusionXLPipelineOutput(images=image)
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--model_name_or_path", type=str, default="stabilityai/stable-diffusion-xl-base-1.0"
+    )
+    parser.add_argument("--quantize", action="store_true")
+    parser.add_argument("--load", action="store_true")
+    parser.add_argument("--optimized", action="store_true", help="Load quantized model.")
+    parser.add_argument("--performance", action="store_true")
+    parser.add_argument("--n_steps", type=int, default=20)
+    parser.add_argument("--batch-size", type=int, default=1)
+    parser.add_argument("--calib-size", type=int, default=10)
+    parser.add_argument("--latent", type=str, default="latents.pt")
+    parser.add_argument("--alpha", type=float, default=0.5, help="SmoothQuant Alpha")
+    parser.add_argument("--output_dir", type=str, default="./saved_results", help="output directory")
+    parser.add_argument("--iters", default=10, type=int, help="For performance measurement only.")
+
+    args = parser.parse_args()
+    os.makedirs(args.output_dir, exist_ok=True)
+
+    args.calib_size = args.calib_size // args.batch_size
+
+    dtype = torch.float32
+
+    pipeline = StableDiffusionXLPipelineSQ.from_pretrained(
+        args.model_name_or_path,
+        torch_dtype=dtype,
+        use_safetensors=True,
+    )
+    pipeline = pipeline.to(dtype)  # Ensure all modules are set as dtype
+    pipeline.scheduler = EulerDiscreteScheduler.from_config(pipeline.scheduler.config)
+    
+    # This is a list of prompts
+    cali_prompts = [['A brown and white dog runs on some brown grass near a Frisbee that is just sailing above the ground.'],
+                    ['The bus is traveling down a two way street.']]
+
+    torch.random.manual_seed(42)
+    if args.latent is not None:
+        init_latent = torch.load(args.latent).to(dtype)
+    else:
+        init_latent = torch.randn((1,4,128,128), dtype=dtype)
+
+    prompts = cali_prompts[0]
+    ref_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+    save_images(prompts, ref_images, args.output_dir, prefix='ref')
+
+    def forward_loop(model):
+        do_calibration(
+            pipeline=pipeline,
+            calibration_prompts=cali_prompts,
+            calib_size=args.calib_size,
+            n_steps=args.n_steps,
+            latent=init_latent,
+        )
+    
+    if args.quantize:
+        excluded_precisions = ["bf16"]
+        example_inputs = {"sample": torch.randn((2, 4, 128, 128), dtype=dtype),
+                        "timestep": torch.tensor(951.0),
+                        "encoder_hidden_states": torch.randn((2, 77, 2048), dtype=dtype),
+                        "added_cond_kwargs": {'text_embeds':torch.randn((2, 1280), dtype=dtype),
+                                              'time_ids': torch.tensor([[1024., 1024.,    0.,    0., 1024., 1024.],
+                                                                        [1024., 1024.,    0.,    0., 1024., 1024.]], dtype=dtype)},}
+        
+        from neural_compressor.torch.quantization import SmoothQuantConfig, prepare, convert
+        quant_config = SmoothQuantConfig(alpha=args.alpha, excluded_precisions=excluded_precisions)
+        user_model = prepare(model=pipeline.unet, quant_config=quant_config, example_inputs=example_inputs)
+        forward_loop(user_model)
+        q_unet = convert(user_model)
+        q_unet.save(args.output_dir)
+    
+    if args.load:
+        if args.optimized:
+            from neural_compressor.torch.quantization import load
+            q_unet = load(os.path.abspath(os.path.expanduser(args.output_dir)))
+        else:
+            q_unet = pipeline.unet
+    if not hasattr(q_unet, "config"):
+        setattr(q_unet, "config", pipeline.unet.config)
+    pipeline.unet = q_unet
+    quant_images = prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+    save_images(prompts, quant_images, args.output_dir, prefix='quant')
+
+    if args.performance:
+        import time
+
+        total_iters = args.iters * args.batch_size
+        warmup_iters = 5
+        for i in range(total_iters):
+            if i == warmup_iters:
+                start = time.time()
+                prompts2images(pipeline, prompts, n_steps=args.n_steps, latent=init_latent)
+        end = time.time()
+
+        latency = (end - start) / ((total_iters - warmup_iters) * args.batch_size)
+        throughput = ((total_iters - warmup_iters) * args.batch_size) / (end - start)
+        print("Latency: {:.3f} ms".format(latency * 10**3))
+        print("Throughput: {:.3f} samples/sec".format(throughput))
+        print('Batch size = %d' % args.batch_size)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/README.md b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/README.md
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/README.md
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/download_dataset.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/download_dataset.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/download_dataset.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/download_dataset.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/main.py b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/main.py
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/main.py
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/requirements.txt b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/requirements.txt
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_benchmark.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_benchmark.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_quant.sh b/examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_quant.sh
rename to examples/pytorch/diffusion_model/diffusers/stable_diffusion/static_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/README.md
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/main.py
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/requirements.txt b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/requirements.txt
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/requirements.txt
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_benchmark.sh
diff --git a/examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_quant.sh b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh
similarity index 100%
rename from examples/pytorch/image_recognition/torchvision_models/quantization/ptq/cpu/ipex/run_quant.sh
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/run_quant.sh
diff --git a/examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py b/examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py
rename to examples/pytorch/image_recognition/torchvision_models/quantization/static_quant/ipex/utils.py
diff --git a/examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/__init__.py b/examples/pytorch/multimodal-modeling/quantization/auto_round/__init__.py
similarity index 100%
rename from examples/pytorch/object_detection/ssd_resnet34/quantization/ptq/fx/python/__init__.py
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/__init__.py
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/README.md
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/requirements.txt b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/requirements.txt
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/requirements.txt
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_quant.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_quant.sh
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/run_quant.sh
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/setup.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/setup.sh
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/llama4/setup.sh
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/llama4/setup.sh
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/mllm.py b/examples/pytorch/multimodal-modeling/quantization/auto_round/mllm.py
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/mllm.py
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/mllm.py
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/requirements.txt b/examples/pytorch/multimodal-modeling/quantization/auto_round/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/requirements.txt
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/requirements.txt
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/run_benchmark.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/run_benchmark.sh
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/run_quant.sh b/examples/pytorch/multimodal-modeling/quantization/auto_round/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/multimodal-modeling/quantization/auto_round/run_quant.sh
rename to examples/pytorch/multimodal-modeling/quantization/auto_round/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/quantize.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/quantize.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/quantize.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/requirement.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/requirement.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/requirement.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/requirement.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/fp8_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/quantize.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.1-8B-Instruct_7bits.json
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/recipes/Meta-Llama-3.3-70B-Instruct_5bits.json
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mix-precision/run_hf_inf.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/mx_quant/run_clm_no_trainer.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_clm_no_trainer.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/smooth_quant/utils.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_clm_no_trainer.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/ipex/utils.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_clm_no_trainer.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/static_quant/pt2e/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md
similarity index 97%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md
index a0788a596d1..e0d8e3a8312 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/README.md
@@ -1,6 +1,6 @@
 # Step-by-Step
 We provide a Transformers-like API for model compression using the `WeightOnlyQuant` with `Rtn/Awq/Teq/GPTQ/AutoRound` algorithms, besides we provide use ipex to use intel extension for pytorch to accelerate the model.
-We provide the inference benchmarking script `run_generation.py` for large language models, the default search algorithm is beam search with `num_beams = 4`. [Here](./llm_quantization_recipes.md) are some well accuracy and performance optimized models we validated, more models are working in progress.
+We provide the inference benchmarking script `run_generation.py` for large language models, the default search algorithm is beam search with `num_beams = 4`. [Here](llm_quantization_recipes.md) are some well accuracy and performance optimized models we validated, more models are working in progress.
 
 # Quantization for CPU device
 
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md
similarity index 97%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md
index ad3430cc5bf..81d7f906cc7 100644
--- a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md
+++ b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/llm_quantization_recipes.md
@@ -2,7 +2,7 @@
 
 This document describes the step-by-step instructions to run large language models (LLMs) on 5th Gen Intel® Xeon® Scalable Processor (codenamed Emerald Rapids) with [PyTorch](https://pytorch.org/).
 
-The scripts [run_generation_cpu_woq.py](./run_generation_cpu_woq.py) provide Weight-Only Quantization based on [Intel® Neural Compressor](https://github.com/intel/neural-compressor) and return last word prediction accuracy by [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/master).
+The scripts [run_generation_cpu_woq.py](run_generation_cpu_woq.py) provide Weight-Only Quantization based on [Intel® Neural Compressor](https://github.com/intel/neural-compressor) and return last word prediction accuracy by [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness/tree/master).
 
 # Validated Models
 
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_GPU.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_GPU.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_GPU.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_GPU.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/requirements_cpu_woq.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_cpu_woq.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_generation_gpu_woq.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_tuning.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_tuning.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_tuning.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/transformers/weight_only/text-generation/run_tuning.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/README.md
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements-autoround-hpu.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements-autoround-hpu.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements-autoround-hpu.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements-autoround-hpu.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_clm_no_trainer.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/run_quant.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py b/examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
rename to examples/pytorch/nlp/huggingface_models/language-modeling/quantization/weight_only/utils.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/README.md
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/requirements.txt b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/requirements.txt
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/requirements.txt
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_benchmark.sh
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_qa.py
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_quant.sh b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/run_quant.sh
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/run_quant.sh
diff --git a/examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/trainer_qa.py b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py
similarity index 100%
rename from examples/pytorch/nlp/huggingface_models/question-answering/quantization/ptq_static/ipex/trainer_qa.py
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/trainer_qa.py
diff --git a/examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py b/examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py
similarity index 100%
rename from examples/3.x_api/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py
rename to examples/pytorch/nlp/huggingface_models/question-answering/quantization/static_quant/ipex/utils_qa.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/CODE_OF_CONDUCT.md b/examples/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/CODE_OF_CONDUCT.md
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/CODE_OF_CONDUCT.md
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/CONTRIBUTING.md b/examples/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/CONTRIBUTING.md
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/CONTRIBUTING.md
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/LICENSE b/examples/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/LICENSE
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/LICENSE
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/README.md b/examples/pytorch/recommendation/dlrm/static_quant/ipex/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/README.md
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/README.md
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_loader_terabyte.py b/examples/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_loader_terabyte.py
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/data_loader_terabyte.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_utils.py b/examples/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/data_utils.py
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/data_utils.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_data_pytorch.py b/examples/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/dlrm_data_pytorch.py
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_data_pytorch.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py b/examples/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/dlrm_s_pytorch.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/extend_distributed.py b/examples/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/extend_distributed.py
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/extend_distributed.py
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/requirements.txt b/examples/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/requirements.txt
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/requirements.txt
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh b/examples/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/run_benchmark.sh
diff --git a/examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_quant.sh b/examples/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh
old mode 100755
new mode 100644
similarity index 100%
rename from examples/pytorch/recommendation/dlrm/quantization/ptq/ipex/run_quant.sh
rename to examples/pytorch/recommendation/dlrm/static_quant/ipex/run_quant.sh
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/README.md b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/README.md
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/README.md
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/README.md
diff --git a/examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/__init__.py b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/__init__.py
similarity index 100%
rename from examples/pytorch/object_detection/yolo_v3/quantization/ptq_static/fx/utils/__init__.py
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/__init__.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/dlrm_dataloader.py b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/dlrm_dataloader.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/dlrm_dataloader.py
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/dlrm_dataloader.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/multi_hot_criteo.py b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/multi_hot_criteo.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/multi_hot_criteo.py
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/data_process/multi_hot_criteo.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/dlrm_model.py b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/dlrm_model.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/dlrm_model.py
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/dlrm_model.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/main.py b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/main.py
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/main.py
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/main.py
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/requirements.txt b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/requirements.txt
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/requirements.txt
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/requirements.txt
diff --git a/examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/setup.sh b/examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/setup.sh
similarity index 100%
rename from examples/3.x_api/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/setup.sh
rename to examples/pytorch/recommendation/dlrm_v2/fp8_quant/cpu/setup.sh
diff --git a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/README.md b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
index aed9c1c2f34..057b3559756 100644
--- a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
+++ b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Object Detection models tuning results. This example can run on Intel CPUs and GPUs.
@@ -61,18 +61,6 @@ unzip ppi.zip
 
 # Run
 
-## Quantization Config
-
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```
-config = PostTrainingQuantConfig(
-    device="gpu",
-    backend="itex",
-    ...
-    )
-```
-
 ## 1. Quantization
   
   ```shell
@@ -99,23 +87,18 @@ For graphsage, we applied the latter one because our philosophy is to enable the
 After prepare step is done, we just need update main.py like below.
 ```python
     if args.tune:
-        from neural_compressor import quantization
-        from neural_compressor.data import DataLoader
-        from neural_compressor.config import PostTrainingQuantConfig  
+        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+        from neural_compressor.tensorflow.utils import BaseDataLoader
+
         dataset = CustomDataset()
-        calib_dataloader=DataLoader(framework='tensorflow', dataset=dataset, \
-                                    batch_size=1, collate_fn = collate_function)          
-        conf = PostTrainingQuantConfig()
-        q_model = quantization.fit(args.input_graph, conf=conf, \
-                                    calib_dataloader=calib_dataloader, eval_func=evaluate)
+        calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
+        quant_config = StaticQuantConfig()
+        q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
         q_model.save(args.output_graph)
 
     if args.benchmark:
         if args.mode == 'performance':
-            from neural_compressor.benchmark import fit
-            from neural_compressor.config import BenchmarkConfig
-            conf = BenchmarkConfig()
-            fit(args.input_graph, conf, b_func=evaluate)
+            evaluate(args.input_graph)
         elif args.mode == 'accuracy':
             acc_result = evaluate(args.input_graph)
             print("Batch size = %d" % args.batch_size)
diff --git a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
index 3e237dd972e..e2a1d28d7d7 100644
--- a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
+++ b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/dataloader.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/main.py b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
index 51a9e0fd384..b66e96657f3 100644
--- a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
+++ b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -58,7 +58,7 @@
 def prepare_Dataset():
     data_location = args.dataset_location
     pretrained_model = args.input_graph
-    data = dataloader.load_data(prefix=data_location+'/ppi')
+    data = dataloader.load_data(prefix=data_location + '/ppi')
     G = data[0]
     features = data[1]
     id_map = data[2]
@@ -70,13 +70,13 @@ def prepare_Dataset():
 
     context_pairs = data[3]
     placeholders = utils.construct_placeholders(num_classes)
-    minibatch = utils.NodeMinibatchIterator(G, 
-            id_map,
-            placeholders, 
-            class_map,
-            num_classes,
-            batch_size=args.batch_size,
-            context_pairs = context_pairs)
+    minibatch = utils.NodeMinibatchIterator(G,
+                                            id_map,
+                                            placeholders,
+                                            class_map,
+                                            num_classes,
+                                            batch_size=args.batch_size,
+                                            context_pairs = context_pairs)
     return minibatch
 
 class CustomDataset(object):
@@ -110,7 +110,7 @@ def evaluate(model):
     Returns:
         accuracy (float): evaluation result, the larger is better.
     """
-    from neural_compressor.model import Model
+    from neural_compressor.tensorflow import Model
     model = Model(model)
     output_tensor = model.output_tensor if len(model.output_tensor)>1 else \
                         model.output_tensor[0]
@@ -163,27 +163,22 @@ class eval_graphsage_optimized_graph:
 
     def run(self):
         """This is neural_compressor function include tuning, export and benchmark option."""
-        from neural_compressor import set_random_seed
+        from neural_compressor.common import set_random_seed
         set_random_seed(9527)
         
         if args.tune:
-            from neural_compressor import quantization
-            from neural_compressor.data import DataLoader
-            from neural_compressor.config import PostTrainingQuantConfig  
+            from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
+            from neural_compressor.tensorflow.utils import BaseDataLoader
+
             dataset = CustomDataset()
-            calib_dataloader=DataLoader(framework='tensorflow', dataset=dataset, \
-                                        batch_size=1, collate_fn = collate_function)          
-            conf = PostTrainingQuantConfig()
-            q_model = quantization.fit(args.input_graph, conf=conf, \
-                                       calib_dataloader=calib_dataloader, eval_func=evaluate)
+            calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=1, collate_fn=collate_function)
+            quant_config = StaticQuantConfig()
+            q_model = quantize_model(args.input_graph, quant_config, calib_dataloader)
             q_model.save(args.output_graph)
 
         if args.benchmark:
             if args.mode == 'performance':
-                from neural_compressor.benchmark import fit
-                from neural_compressor.config import BenchmarkConfig
-                conf = BenchmarkConfig()
-                fit(args.input_graph, conf, b_func=evaluate)
+                evaluate(args.input_graph)
             elif args.mode == 'accuracy':
                 acc_result = evaluate(args.input_graph)
                 print("Batch size = %d" % args.batch_size)
diff --git a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
index 7ff4311b80d..babe7146f5c 100644
--- a/examples/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
+++ b/examples/tensorflow/graph_networks/graphsage/quantization/ptq/utils.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_quant.sh b/examples/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_152/quantization/ptq/run_quant.sh
rename to examples/tensorflow/image_recognition/inception_v3/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_quant.sh b/examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/resnet_v2_50/quantization/ptq/run_quant.sh
rename to examples/tensorflow/image_recognition/mobilenet_v2/quantization/ptq/run_quant.sh
diff --git a/examples/keras/image_recognition/prepare_dataset.sh b/examples/tensorflow/image_recognition/prepare_dataset.sh
similarity index 100%
rename from examples/keras/image_recognition/prepare_dataset.sh
rename to examples/tensorflow/image_recognition/prepare_dataset.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_quant.sh b/examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg16/quantization/ptq/run_quant.sh
rename to examples/tensorflow/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/README.md
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/main.py
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/requirements.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_quant.sh b/examples/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vgg19/quantization/ptq/run_quant.sh
rename to examples/tensorflow/image_recognition/vgg16/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/README.md
diff --git a/examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/__init__.py b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py
similarity index 100%
rename from examples/tensorflow/nlp/bert_base_mrpc/quantization/ptq/__init__.py
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/__init__.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_quant.sh b/examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/vision_transformer/quantization/ptq/run_quant.sh
rename to examples/tensorflow/image_recognition/vision_transformer/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/build_imagenet_data.py b/examples/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/build_imagenet_data.py
rename to examples/tensorflow/keras/image_recognition/imagenet_prepare/build_imagenet_data.py
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_and_convert_imagenet.sh b/examples/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_and_convert_imagenet.sh
rename to examples/tensorflow/keras/image_recognition/imagenet_prepare/download_and_convert_imagenet.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_imagenet.sh b/examples/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/download_imagenet.sh
rename to examples/tensorflow/keras/image_recognition/imagenet_prepare/download_imagenet.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt b/examples/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
rename to examples/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_lsvrc_2015_synsets.txt
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_metadata.txt b/examples/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/imagenet_prepare/imagenet_metadata.txt
rename to examples/tensorflow/keras/image_recognition/imagenet_prepare/imagenet_metadata.txt
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
similarity index 97%
rename from examples/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
index fc6119d5c50..abf63dc93b4 100644
--- a/examples/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
+++ b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/prepare_model.py
@@ -1,35 +1,35 @@
-#
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2022 Intel Corporation
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import argparse
-from tensorflow.keras.applications.inception_v3 import InceptionV3
-def get_inception_v3_model(saved_path):
-    model = InceptionV3(weights='imagenet')
-    model.save(saved_path)
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-    description='Export pretained keras model',
-    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-    parser.add_argument(
-        '--output_model',
-        type=str,
-        help='path to exported model file')
-
-    args = parser.parse_args()
-    get_inception_v3_model(args.output_model)
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import argparse
+from tensorflow.keras.applications.inception_v3 import InceptionV3
+def get_inception_v3_model(saved_path):
+    model = InceptionV3(weights='imagenet')
+    model.save(saved_path)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+    description='Export pretained keras model',
+    formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument(
+        '--output_model',
+        type=str,
+        help='path to exported model file')
+
+    args = parser.parse_args()
+    get_inception_v3_model(args.output_model)
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_101/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_quant.sh b/examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/run_quant.sh
rename to examples/tensorflow/keras/image_recognition/inception_v3/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/image_recognition/tensorflow_models/prepare_dataset.sh b/examples/tensorflow/keras/image_recognition/prepare_dataset.sh
similarity index 100%
rename from examples/tensorflow/image_recognition/tensorflow_models/prepare_dataset.sh
rename to examples/tensorflow/keras/image_recognition/prepare_dataset.sh
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/main.py
diff --git a/examples/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/keras/image_recognition/resnetv2_50/quantization/ptq/prepare_model.py
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/prepare_model.py
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/requirements.txt b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/requirements.txt
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/requirements.txt
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_benchmark.sh
diff --git a/examples/keras/image_recognition/xception/quantization/ptq/run_quant.sh b/examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/keras/image_recognition/xception/quantization/ptq/run_quant.sh
rename to examples/tensorflow/keras/image_recognition/resnet_v2_50/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
index 15d84c7f32e..41a673fc834 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning result of Intel® Model Zoo bert large model on squad v1.1 task.
@@ -74,7 +74,7 @@ bash prepare_dataset.sh --output_dir=./data
 ### Convert the dataset to TF Record format
 After the dataset is downloaded by either of ways above, the dataset should be converted to files of TF Record format.
 ```shell
-python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=./eval.tf_record
+python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v1.1.json --output_file=data/eval.tf_record
 ```
 
 # Run Command
@@ -82,22 +82,11 @@ python create_tf_record.py --vocab_file=data/vocab.txt --predict_file=data/dev-v
 
 ## Quantization
   ```shell
-  bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=/path/to/evaluation/dataset
+  bash run_quant.sh --input_model=./fp32_bert_squad.pb --output_model=./bert_squad_int8.pb --dataset_location=data
   ```
 
-### Quantization Config
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```
-config = PostTrainingQuantConfig(
-    device="gpu",
-    backend="itex",
-    ...
-    )
-```
-
 ## Benchmark
   ```shell
-  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=/path/to/evaluation/dataset --batch_size=64
-  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=/path/to/evaluation/dataset --batch_size=64
+  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=accuracy --dataset_location=data --batch_size=64
+  bash run_benchmark.sh --input_model=./bert_squad_int8.pb --mode=performance --dataset_location=data --batch_size=64
   ```
\ No newline at end of file
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
index 8fb20e36c59..8adecb971fd 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_pretraining_data.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2022 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
index d24d701d953..12c6486283d 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/create_tf_record.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2022 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -113,7 +113,7 @@ def __init__(self,
     self.is_impossible = is_impossible
 
 
-def read_squad_examples(input_file, is_training):
+def read_squad_examples(input_file, is_training=None):
   """Read a SQuAD json file into a list of SquadExample."""
   with tf.io.gfile.GFile(input_file, "r") as reader:
     input_data = json.load(reader)["data"]
@@ -196,8 +196,8 @@ def is_whitespace(c):
 
 
 def convert_examples_to_features(examples, tokenizer, max_seq_length,
-                                 doc_stride, max_query_length, is_training,
-                                 output_fn):
+                                 doc_stride, max_query_length, is_training=None,
+                                 output_fn=None):
   """Loads a data file into a list of `InputBatch`s."""
 
   unique_id = 1000000000
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py
rename to examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py
rename to examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/main.py
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
index b367b93247b..aa8d269a79a 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_benchmark.sh
@@ -38,7 +38,7 @@ function init_params {
 # run_tuning
 function run_benchmark {
 
-    python tune_squad.py \
+    python main.py \
       --input_model=${input_model} \
       --mode=${mode} \
       --dataset_location=${dataset_location} \
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
index bee4ecaf784..ddc30b40177 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/run_quant.sh
@@ -34,7 +34,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python tune_squad.py \
+    python main.py \
       --input_model=${input_model} \
       --output_model=${output_model} \
       --dataset_location=${dataset_location} \
diff --git a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
index dbf083617f5..77c3175db07 100644
--- a/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
+++ b/examples/tensorflow/nlp/bert_large_squad_model_zoo/quantization/ptq/tokenization.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2022 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
index 24b558ed032..7a8c22631e0 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor smooth quantization of language models gpt-j-6B.
@@ -11,7 +11,7 @@ This document is used to list steps of reproducing TensorFlow Intel® Neural Com
 ```shell
 # Install Intel® Neural Compressor
 pip install neural-compressor
-pip install -r requirements
+pip install -r requirements.txt
 ```
 
 ## 2. Prepare Pretrained model
@@ -91,16 +91,6 @@ class MyDataloader:
         return self.length
 ```
 
-### Quantization Config
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```python
-config = PostTrainingQuantConfig(
-    device="gpu",
-    backend="itex",
-    ...
-    )
-```
 
 ### Code Update
 After prepare step is done, we add the code for quantization tuning to generate quantized model.
@@ -124,30 +114,24 @@ To apply quantization, the function that maps names from AutoTrackable variables
 
 Please use the recipe to set smooth quantization.
 ```python
-    from neural_compressor import quantization, PostTrainingQuantConfig
+    from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
+    from neural_compressor.tensorflow.quantization import TuningConfig
+    from neural_compressor.tensorflow.utils import BaseDataLoader
+
     calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)  
-    recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': 0.52705}}
-    conf = PostTrainingQuantConfig(quant_level=1, 
-                                    excluded_precisions=["bf16"],##use basic tuning
-                                    recipes=recipes,
-                                    calibration_sampling_size=[1],
-                                    )
-    
+    quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
+    tune_config = TuningConfig(config_set=quant_config, max_trials=1)
     model.weight_name_mapping = weight_name_mapping
-    q_model = quantization.fit( model,
-                                conf,
-                                eval_func=evaluate,
-                                calib_dataloader=calib_dataloader)
-
+    q_model = autotune(model, 
+                        tune_config, 
+                        eval_fn=evaluate,
+                        calib_dataloader=calib_dataloader)
     q_model.save(run_args.output_model)
 ```
 #### Benchmark
 ```python
     if run_args.mode == "performance":
-        from neural_compressor.benchmark import fit
-        from neural_compressor.config import BenchmarkConfig
-        conf = BenchmarkConfig(warmup=10, iteration=run_args.iteration, cores_per_instance=4, num_of_instance=1)
-        fit(model, conf, b_func=evaluate)
+        evaluate(model.model)
     elif run_args.mode == "accuracy":
         acc_result = evaluate(model.model)
         print("Batch size = %d" % run_args.batch_size)
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
index 3de5c654d58..faf54b65bd0 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -276,6 +276,10 @@ def evaluate(model, tf_eval_dataset=mydata):
     iteration = run_args.iteration
     correct = 0
     latency_list = []
+    from neural_compressor.tensorflow.utils import BaseModel
+
+    if isinstance(model, BaseModel):
+        model = model.model
     infer = model.signatures["serving_default"]
     for idx, data in enumerate(tf_eval_dataset):
         input_ids = tf.convert_to_tensor([data[:-1]], dtype=tf.int32)
@@ -316,35 +320,26 @@ def main():
     with train_args.strategy.scope():
         options = tf.data.Options()
         options.experimental_distribute.auto_shard_policy = tf.data.experimental.AutoShardPolicy.OFF
-        from neural_compressor import Model
+        from neural_compressor.tensorflow import Model
         model = Model(run_args.input_model, modelType='llm_saved_model')
 
         if run_args.tune:
-            from neural_compressor.config import AccuracyCriterion
-            from neural_compressor import quantization, PostTrainingQuantConfig
+            from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, autotune
+            from neural_compressor.tensorflow.quantization import TuningConfig
+            from neural_compressor.tensorflow.utils import BaseDataLoader
 
             calib_dataloader = MyDataloader(mydata, batch_size=run_args.batch_size)  
-            recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': 0.52705}}
-            conf = PostTrainingQuantConfig(quant_level=1, 
-                                            excluded_precisions=["bf16"],##use basic tuning
-                                            recipes=recipes,
-                                            calibration_sampling_size=[1],
-                                            accuracy_criterion=AccuracyCriterion()
-                                            )
-            
+            quant_config = [SmoothQuantConfig(alpha=0.52705), StaticQuantConfig(act_dtype="int8", weight_dtype="int8")]
+            tune_config = TuningConfig(config_set=quant_config, max_trials=1)
             model.weight_name_mapping = weight_name_mapping
-            q_model = quantization.fit( model,
-                                        conf,
-                                        eval_func=evaluate,
-                                        calib_dataloader=calib_dataloader)
-
+            q_model = autotune(model, 
+                               tune_config, 
+                               eval_fn=evaluate,
+                               calib_dataloader=calib_dataloader)
             q_model.save(run_args.output_model)
         if run_args.benchmark:
             if run_args.mode == "performance":
-                from neural_compressor.benchmark import fit
-                from neural_compressor.config import BenchmarkConfig
-                conf = BenchmarkConfig(warmup=10, iteration=run_args.iteration, cores_per_instance=4, num_of_instance=1)
-                fit(model, conf, b_func=evaluate)
+                evaluate(model.model)
             elif run_args.mode == "accuracy":
                 acc_result = evaluate(model.model)
                 print("Batch size = %d" % run_args.batch_size)
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
index 12ac19cf4ba..cb4cd7f3f29 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/prepare_model.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
index 186124490e2..23c79d8bbd3 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/gpt-j/requirements.txt
@@ -1,4 +1,4 @@
-tensorflow==2.12.1
+tensorflow==2.12
 transformers
-datasets
+datasets==2.17
 numpy
\ No newline at end of file
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
index 386371f8eb5..fa45adbd5ef 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor quantization and smooth quantization of language models such as OPT and GPT2.
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
index acf3299689b..eda0d222d39 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/benchmark.py
@@ -1,3 +1,20 @@
+#
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2024 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 import os.path
 import transformers
 import tensorflow as tf
@@ -11,7 +28,7 @@
 sys.path.insert(0, './')
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--int8', action='store_true', help="eval fp32 model or int8 model")
+parser.add_argument('--optimized', action='store_true', help="eval fp32 model or optimized model")
 parser.add_argument('--model_name_or_path', type=str, default='facebook/opt-125m')
 parser.add_argument('--batch_size', type=int, default=16)
 parser.add_argument('--warmup', type=int, default=10)
@@ -47,7 +64,6 @@ def get_attention_mask(self, input_ids):
         return tf.constant(1 - (input_ids==1).numpy().astype(int))
     
     def evaluate_tf_v1(self, model):
-        # return 0.99 # TODO debug remove
         total, hit = 0, 0
         index = 1
         infer = model.signatures["serving_default"]
@@ -157,16 +173,18 @@ def __len__(self):
 
 evaluator = Evaluator(eval_dataset, tokenizer, 'cpu')
 
-if args.int8:
-    print("benchmarking int8 model")
-    int8_folder = model_name.split('/')[-1] + "_int8"
-    if not os.path.exists(int8_folder):
-        print(f"could not find int8 folder {int8_folder} ")
+if args.optimized:
+    print("benchmarking optimized model")
+    optimized_folder = model_name.split('/')[-1] + "_int8"
+    if not os.path.exists(optimized_folder):
+        print(f"could not find optimized folder {optimized_folder} ")
         exit()
-    model = tf.saved_model.load(int8_folder)    # tensorflow.python.trackable.autotrackable.AutoTrackable object
+    model = tf.saved_model.load(optimized_folder)    # tensorflow.python.trackable.autotrackable.AutoTrackable object
 else:
     print("benchmaking fp32 model")
-    print("Benchmaking fp32 model is not longer supported in this example. please go to 3.x_api examples or raise issue to us.")
-    exit(0)
+    model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
+    from neural_compressor.tensorflow import Model
+
+    model = Model(model).model # tensorflow.python.trackable.autotrackable.AutoTrackable object
 
 evaluator.evaluate_tf_v1(model)
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
index b88cd9f7a09..8f012ceb404 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/main.py
@@ -1,7 +1,7 @@
 #
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2023 Intel Corporation
+# Copyright (c) 2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -28,10 +28,7 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--sq', action='store_true', default=False, help="whether to use smooth quant")
-# parser.add_argument('--calib_num', type=int, default=100, help="calibration num for sq")
 parser.add_argument('--model_name_or_path', type=str, default="facebook/opt-125m")
-# TODO auto tuning not supported currently for TF backend
-# parser.add_argument('--alpha', default=0.5, help="Set alpha=auto to use alpha tuning.")
 parser.add_argument('--alpha', type=float, default=0.5, help="alpha value for smoothing.")
 parser.add_argument('--log_frequency', type=int, default=100)
 parser.add_argument('--batch_size', type=int, default=16)
@@ -39,58 +36,6 @@
 parser.add_argument('--fallback_add', action='store_true', default=False, help="Whether to add fp32 fallback option" )
 args = parser.parse_args()
 
-class Evaluator:
-    def __init__(self, dataset, tokenizer, device, batch_size=args.batch_size):
-        self.dataset = dataset
-        self.tokenizer = tokenizer
-        self.device = device
-        self.dataloader = CustomDataloader(dataset, tokenizer, batch_size, device)
-
-    def evaluate(self, model):
-        # model.eval()
-        # The task is to predict the last word of the input.
-        total, hit = 0, 0
-        index = 1
-        for input_ids, label, label_indices in tqdm(self.dataloader):
-            # TFCausalLMOutputWithPast len: 2
-            # first element shape (16, 196, 50272)
-            # second element shape (16, 12, 196, 64)
-            outputs = model(input_ids)
-            last_token_logits = outputs[0].numpy()[np.arange(len(label_indices)), label_indices, :]
-            pred = last_token_logits.argmax(axis=-1)
-            total += label.shape[0]
-            hit += (pred == label.numpy()).sum().item()
-            if index % args.log_frequency == 0:
-                print(hit / total, flush=True)
-            index += 1
-        acc = hit / total
-        print(acc, flush=True)
-        return acc
-    
-    def get_attention_mask(self, input_ids):
-        return tf.constant(1 - (input_ids==1).numpy().astype(int))
-    
-    def evaluate_tf_v1(self, model):
-        # return 0.99 # TODO debug remove
-        total, hit = 0, 0
-        index = 1
-        infer = model.signatures["serving_default"]
-        for input_ids, label, label_indices in tqdm(self.dataloader):
-            attention_mask = self.get_attention_mask(input_ids)
-            input_ids = tf.constant(input_ids.numpy(), dtype=infer.inputs[0].dtype)
-            attention_mask = tf.constant(attention_mask.numpy(), dtype=infer.inputs[0].dtype)
-            results = infer(input_ids=input_ids, attention_mask=attention_mask) # len: 25 Identity: [16, 196, 50272], Identity_1: [16, 12, 196, 64]
-            last_token_logits = results['Identity'].numpy()[np.arange(len(label_indices)), label_indices, :]
-            pred = last_token_logits.argmax(axis=-1)
-            total += label.shape[0]
-            hit += (pred == label.numpy()).sum().item()
-            if index % args.log_frequency == 0:
-                print(hit / total, flush=True)
-            index += 1
-        acc = hit / total
-        print(acc, flush=True)
-        return acc
-
 class CustomDataloader:
     # for_calib=True in quantization, only input_id is needed, =False in evaluation need label
     def __init__(self, dataset, tokenizer, batch_size=1, device='cpu', for_calib=False):
@@ -127,13 +72,9 @@ def pad_input(self, input): # input: a record
     def __iter__(self):
         if self.for_calib:
             labels = None
-            # label_indices = None
             for idx, record in enumerate(self.dataset):
                 input_id, label, label_index = self.pad_input(record)
                 attention_mask = self.get_attention_mask(input_id)
-                # compose attention_mask and input_id together
-                # during the calibration, it requires to yield a <attention_mask, input_id>
-                # cur_input = tf.constant(np.append(attention_mask, input_id.numpy(), axis=0))
                 cur_input = {"input_ids": input_id.numpy(), "attention_mask": attention_mask}
                 assert self.batch_size == 1
                 yield (cur_input, label)
@@ -169,42 +110,31 @@ def __len__(self):
 
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
 model = transformers.TFAutoModelForCausalLM.from_pretrained(model_name)
-eval_dataset = load_dataset('lambada', split='validation')
-
-# model.eval()
 
-evaluator = Evaluator(eval_dataset, tokenizer, 'cpu')
-
-calib_dataset = load_dataset('lambada', split='train')
-# calib_dataset = eval_dataset  # TODO for debug
+calib_dataset = load_dataset('lambada', split='validation')
 calib_dataset = calib_dataset.shuffle(seed=42)
 calib_dataloader = CustomDataloader(calib_dataset, tokenizer, device='cpu', batch_size=1, for_calib=True)
 
-def eval_func(model):
-    acc = evaluator.evaluate_tf_v1(model)
-    return acc
+from neural_compressor.tensorflow import StaticQuantConfig, SmoothQuantConfig, quantize_model
 
-from neural_compressor import PostTrainingQuantConfig
-from neural_compressor.config import AccuracyCriterion
-from neural_compressor import quantization
+ptq_config = None
+quant_config = []
 
-recipes = {}
 if args.sq:
-    recipes = {"smooth_quant": True, "smooth_quant_args": {'alpha': args.alpha}}
-op_type_dict = {}
+    quant_config.append(SmoothQuantConfig(alpha=args.alpha))
 if args.kl:
-    op_type_dict = {'linear': {'activation': {'algorithm': ['kl']}}}
+    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8", act_algorithm="kl")
 if args.fallback_add:
-    op_type_dict["add"] = {"weight": {"dtype": ["fp32"]}, "activation": {"dtype": ["fp32"]}}
-conf = PostTrainingQuantConfig(quant_level=1, excluded_precisions=["bf16"],##use basic tuning
-                                recipes=recipes,
-                                op_type_dict=op_type_dict, accuracy_criterion=AccuracyCriterion(
-tolerable_loss=0.011,      # TODO remove for debug
-))
+    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
+    ptq_config.set_local("Add", StaticQuantConfig(act_dtype="fp32", weight_dtype="fp32"))
+
+if not ptq_config:
+    ptq_config = StaticQuantConfig(act_dtype="int8", weight_dtype="int8")
+quant_config.append(ptq_config)
+
+q_model = quantize_model(model, 
+                         quant_config, 
+                         calib_dataloader=calib_dataloader)
 
-q_model = quantization.fit(model,
-                            conf,
-                            calib_dataloader=calib_dataloader,
-                            eval_func=eval_func)
 save_model_name = model_name.split("/")[-1]
 q_model.save(f"{save_model_name}_int8")
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
index fb574f91ff5..8e5871fe7fd 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/requirements.txt
@@ -1,3 +1,3 @@
-tensorflow
+tensorflow==2.15
 datasets
-transformers
\ No newline at end of file
+transformers==4.52.1
\ No newline at end of file
diff --git a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
index b8fad17eebd..fed1991e4ca 100644
--- a/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
+++ b/examples/tensorflow/nlp/large_language_models/quantization/ptq/smoothquant/run_benchmark.sh
@@ -10,7 +10,7 @@ function main {
 
 # init params
 function init_params {
-  int8=false
+  optimized=false
   batch_size=16
   for var in "$@"
   do
@@ -18,8 +18,8 @@ function init_params {
       --input_model=*)
           input_model=$(echo $var |cut -f2 -d=)
       ;;
-      --int8=*)
-          int8=$(echo $var |cut -f2 -d=)
+      --optimized=*)
+          optimized=$(echo $var |cut -f2 -d=)
       ;;
       --batch_size=*)
           batch_size=$(echo $var |cut -f2 -d=)
@@ -31,11 +31,11 @@ function init_params {
 
 # run_tuning
 function run_benchmark {
-  if [[ "${int8}" == "true" ]]; then
+  if [[ "${optimized}" == "true" ]]; then
      python benchmark.py \
         --model_name_or_path ${input_model} \
         --batch_size ${batch_size} \
-        --int8
+        --optimized
   else
      python benchmark.py \
         --model_name_or_path ${input_model} \
diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
index fb672168204..544e954371e 100644
--- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
+++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of Transformer-LT. This example can run on Intel CPUs and GPUs.
@@ -50,30 +50,30 @@ tar -zxvf transformer_lt_official_fp32_pretrained_model.tar.gz
 Dataset is in data folder, pretrained model is in graph folder.
 
 #### Automatic dataset & model download
-Run the `prepare_dataset_model.sh` script located in `examples/tensorflow/nlp/transformer_lt/quantization/ptq`.
+Run the `prepare_dataset_model.sh` script located in `examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq`.
 
 ```shell
-cd examples/tensorflow/nlp/transformer_lt/quantization/ptq
+cd examples/3.x_api/tensorflow/nlp/transformer_lt/quantization/ptq
 bash prepare_dataset_model.sh
 ```
 
 ## Run Command
+### Quantization
 
 ```shell
-python main.py --input_graph=/path/to/fp32_graphdef.pb --inputs_file=/path/to/newstest2014.en --reference_file=/path/to/newstest2014.de --vocab_file=/path/to/vocab.txt --tune
+bash run_quant.sh --input_model=./model/fp32_graphdef.pb --dataset_location=./data --output_model=./model/int8_graphdef.pb
+```
+### Benchmark
+```shell
+bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=performance
+
+bash run_benchmark.sh --input_model=./model/int8_graphdef.pb --dataset_location=./data --mode=accuracy --batch_size=1
 ```
 
 Details of enabling Intel® Neural Compressor on transformer-lt for Tensorflow.
 =========================
 
 This is a tutorial of how to enable transformer-lt model with Intel® Neural Compressor.
-## User Code Analysis
-1. User specifies fp32 *model*, calibration dataset *q_dataloader*, evaluation dataset *eval_dataloader* and metric in tuning.metric field of model-specific yaml config file.
-
-2. User specifies fp32 *model*, calibration dataset *q_dataloader* and a custom *eval_func* which encapsulates the evaluation dataset and metric by itself.
-
-For transformer-lt, we applied the latter one because we don't have dataset and metric for transformer-lt. The task is to implement the *q_dataloader* and *eval_func*.
-
 
 ### q_dataloader Part Adaption
 Below dataset class uses getitem to provide the model with input.
@@ -96,19 +96,6 @@ class Dataset(object):
 ### Evaluation Part Adaption
 We evaluate the model with BLEU score, its source: https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py
 
-### Quantization Config
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```
-config = PostTrainingQuantConfig(
-    device="gpu",
-    backend="itex",
-	inputs=['input_tensor'],
-    outputs=['model/Transformer/strided_slice_19'],
-    ...
-    )
-```
-
 Here we set the input tensor and output tensors name into *inputs* and *outputs* args.
 In this case we calibrate and quantize the model, and use our calibration dataloader initialized from a 'Dataset' object.
 
@@ -117,17 +104,16 @@ After prepare step is done, we add tune code to generate quantized model.
 
 #### Tune
 ```python
-    from neural_compressor import quantization
-    from neural_compressor.data import DataLoader
-    from neural_compressor.config import PostTrainingQuantConfig
-    ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-    calib_dataloader = DataLoader(dataset=ds, collate_fn=collate_fn, \
-                                    batch_size=FLAGS.batch_size, framework='tensorflow')
-    conf = PostTrainingQuantConfig(inputs=['input_tensor'],
-                                    outputs=['model/Transformer/strided_slice_19'],
-                                    calibration_sampling_size=[500])
-    q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader,
-                eval_func=eval_func)
+    from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+    dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+    calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+
+    quant_config = StaticQuantConfig()
+    model = Model(graph)
+    model.input_tensor_names = ['input_tensor']
+    model.output_tensor_names = ['model/Transformer/strided_slice_19']
+    q_model = quantize_model(model, quant_config, calib_dataloader)
     try:
         q_model.save(FLAGS.output_model)
     except Exception as e:
@@ -135,11 +121,10 @@ After prepare step is done, we add tune code to generate quantized model.
 ```
 #### Benchmark
 ```python
-    from neural_compressor.benchmark import fit
-    from neural_compressor.config import BenchmarkConfig
-    if FLAGS.mode == 'performance':
-        fit(graph, conf=BenchmarkConfig(), b_func=eval_func)
-    elif FLAGS.mode == 'accuracy':
-        eval_func(graph)
+    if FLAGS.benchmark:
+        assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
+        "Benchmark only supports performance or accuracy mode."
+        acc = eval_func(graph)
+        if FLAGS.mode == 'accuracy':
+            print('Accuracy is {:.3f}'.format(acc))
 ```
-The Intel® Neural Compressor quantization.fit() function will return a best quantized model under time constraint.
diff --git a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
index 6d8b92d1317..58a93090e7a 100644
--- a/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
+++ b/examples/tensorflow/nlp/transformer_lt/quantization/ptq/main.py
@@ -30,7 +30,7 @@
 from utils import metrics
 from utils import tokenizer
 from utils.tokenizer import Subtokenizer
-from neural_compressor.data import DataLoader
+from neural_compressor.tensorflow.utils import BaseDataLoader
 
 flags = tf.compat.v1.flags
 FLAGS = flags.FLAGS
@@ -143,9 +143,7 @@ def eval_func(infer_graph, iteration=-1):
         'model/Transformer/strided_slice_19:0')
 
     ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-    dataloader = DataLoader(framework='tensorflow', dataset=ds,
-                                batch_size=FLAGS.batch_size, collate_fn=collate_fn)
-
+    dataloader = BaseDataLoader(dataset=ds, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
     config = tf.compat.v1.ConfigProto()
     config.use_per_session_threads = 1
     config.inter_op_parallelism_threads = 1
@@ -189,7 +187,6 @@ def eval_func(infer_graph, iteration=-1):
                     except:
                         decode.append(subtokenizer.decode(otr))
         bleu_eval.update(decode, labels)
-        print('Accuracy is {:.3f}'.format(bleu_eval.result()))
         return bleu_eval.result()
 
 class Dataset(object):
@@ -235,16 +232,16 @@ def __len__(self):
 def main(_):
     graph = load_graph(FLAGS.input_graph)
     if FLAGS.tune:
-        from neural_compressor import quantization
-        from neural_compressor.config import PostTrainingQuantConfig
-        ds = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
-        calib_dataloader = DataLoader(framework='tensorflow', dataset=ds, \
-                                        batch_size=FLAGS.batch_size, collate_fn=collate_fn,)										
-        conf = PostTrainingQuantConfig(inputs=['input_tensor'],
-                                        outputs=['model/Transformer/strided_slice_19'],
-                                        calibration_sampling_size=[500])       
-        q_model = quantization.fit(graph, conf=conf, calib_dataloader=calib_dataloader,
-                    eval_func=eval_func)
+        from neural_compressor.tensorflow import StaticQuantConfig, quantize_model, Model
+
+        dataset = Dataset(FLAGS.inputs_file, FLAGS.reference_file, FLAGS.vocab_file)
+        calib_dataloader = BaseDataLoader(dataset=dataset, batch_size=FLAGS.batch_size, collate_fn=collate_fn)
+
+        quant_config = StaticQuantConfig()
+        model = Model(graph)
+        model.input_tensor_names = ['input_tensor']
+        model.output_tensor_names = ['model/Transformer/strided_slice_19']
+        q_model = quantize_model(model, quant_config, calib_dataloader)
         try:
             q_model.save(FLAGS.output_model)
         except Exception as e:
@@ -253,13 +250,9 @@ def main(_):
     if FLAGS.benchmark:
         assert FLAGS.mode == 'performance' or FLAGS.mode == 'accuracy', \
         "Benchmark only supports performance or accuracy mode."
-        from neural_compressor.benchmark import fit
-        from neural_compressor.config import BenchmarkConfig
-        if FLAGS.mode == 'performance':
-            conf = BenchmarkConfig(cores_per_instance=28, num_of_instance=1)
-            fit(graph, conf, b_func=eval_func)
-        elif FLAGS.mode == 'accuracy':
-            eval_func(graph)
+        acc = eval_func(graph)
+        if FLAGS.mode == 'accuracy':
+            print('Accuracy is {:.3f}'.format(acc))
 
 if __name__ == "__main__":
     tf.compat.v1.app.run()
diff --git a/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/__init__.py b/examples/tensorflow/nlp/transformer_lt_mlperf/quantization/ptq/utils/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/coco_tools.py
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh b/examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
rename to examples/tensorflow/object_detection/faster_rcnn_resnet50/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/coco_tools.py
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_quant.sh b/examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
rename to examples/tensorflow/object_detection/mask_rcnn_inception_v2/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/prepare_dataset.sh b/examples/tensorflow/object_detection/prepare_dataset.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/prepare_dataset.sh
rename to examples/tensorflow/object_detection/prepare_dataset.sh
diff --git a/examples/tensorflow/object_detection/yolo_v3/quantization/ptq/requirements.txt b/examples/tensorflow/object_detection/requirements.txt
similarity index 100%
rename from examples/tensorflow/object_detection/yolo_v3/quantization/ptq/requirements.txt
rename to examples/tensorflow/object_detection/requirements.txt
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/coco_tools.py
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/main.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/prepare_model.py b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/prepare_model.py
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/prepare_model.py
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_benchmark.sh
diff --git a/examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_quant.sh b/examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/tensorflow/object_detection/tensorflow_models/ssd_resnet50_v1/quantization/ptq/run_quant.sh
rename to examples/tensorflow/object_detection/ssd_mobilenet_v1/quantization/ptq/run_quant.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/README.md
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/main.py
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_dataset.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/prepare_model.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/requirements.txt
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/run_benchmark.sh
diff --git a/examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh b/examples/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh
similarity index 100%
rename from examples/3.x_api/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh
rename to examples/tensorflow/object_detection/yolo_v5/quantization/ptq/run_quant.sh
diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
index 23ddfcaeb6a..7bff08a2f84 100644
--- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
+++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Wide & Deep tuning zoo result.
@@ -41,7 +41,7 @@ pip install --upgrade intel-extension-for-tensorflow[cpu]
 
 ### Install Additional Dependency packages
 ```shell
-cd examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq
+cd examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq
 pip install -r requirements.txt
 ```
 
@@ -87,17 +87,6 @@ Two .tfrecords files are generated and will be used later on:
   bash run_quant.sh --dataset_location=/path/to/datasets --input_model=/path/to/wide_deep_fp32_pretrained_model.pb --output_model=./wnd_int8_opt.pb
   ```
 
-### Quantization Config
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```
-config = PostTrainingQuantConfig(
-        device="gpu",
-        backend="itex",
-        ...
-        )
-```
-
 ## Benchmark
   ```
   bash run_benchmark.sh --dataset_location=/path/to/datasets --input_model=./wnd_int8_opt.pb --mode=accuracy --batch_size=500
diff --git a/examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py
rename to examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/main.py
diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
index 85ae3005205..72ab01f2a19 100644
--- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
+++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_benchmark.sh
@@ -46,7 +46,7 @@ function define_mode {
 # run_tuning
 function run_benchmark {
     #numactl -N 0 -m 0 \
-    python inference.py \
+    python main.py \
             --input_graph ${input_model} \
             --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
             --batch_size ${batch_size} \
diff --git a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
index 685aad45c63..a8068917a27 100644
--- a/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
+++ b/examples/tensorflow/recommendation/wide_deep_large_ds/quantization/ptq/run_quant.sh
@@ -35,7 +35,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python inference.py \
+    python main.py \
             --input_graph ${input_model} \
             --evaluation_data_location ${dataset_location}/eval_processed_data.tfrecords \
             --calibration_data_location ${dataset_location}/train_processed_data.tfrecords \
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
index ebb765aad7c..4307ec85480 100644
--- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
+++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow Intel® Neural Compressor tuning zoo result of 3dunet-mlperf.
@@ -14,9 +14,9 @@ This example can run on Intel CPUs and GPUs.
 pip install neural-compressor
 ```
 
-### Install Intel Tensorflow
+### Install requirements
 ```shell
-pip install intel-tensorflow
+pip install -r requirements.txt
 ```
 > Note: Validated TensorFlow [Version](/docs/source/installation_guide.md#validated-software-environment).
 
@@ -59,27 +59,18 @@ pip install --upgrade intel-extension-for-tensorflow[cpu]
 
 
 # Run command
+Please set the following environment variables before running quantization or benchmark commands:
 
-## Quantization
-
-
-### Quantization Config
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
+* `export nnUNet_preprocessed=<path/to/build>/build/preprocessed_data`
+* `export nnUNet_raw_data_base=<path/to/build>/build/raw_data`
+* `export RESULTS_FOLDER=<path/to/build>/build/result`
 
-```
-config = PostTrainingQuantConfig(
-   device="gpu",
-   backend="itex",
-   ...
-   )
-```
+## Quantization
 
+`bash run_quant.sh --input_model=3dunet_dynamic_ndhwc.pb --dataset_location=<path/to/build>/build --output_model=3dunet_dynamic_ndhwc_int8.pb`
 
 ## Benchmark
 
-* `export nnUNet_preprocessed=<path/to/build>/build/preprocessed_data`
-* `export nnUNet_raw_data_base=<path/to/build>/build/raw_data`
-* `export RESULTS_FOLDER=<path/to/build>/build/result`
-* `pip install -r requirements.txt`
-* `python run_accuracy.py --input-model=<path/to/model_file> --data-location=<path/to/dataset> --calib-preprocess=<path/to/calibrationset> --iters=100 --batch-size=1 --mode=benchmark --bfloat16 0`
+`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=<path/to/build>/build --batch_size=100 --iters=500 --mode=benchmark`
 
+`bash run_benchmark.sh --input_model=3dunet_dynamic_ndhwc_int8.pb --dataset_location=<path/to/build>/build --batch_size=1 --mode=accuracy`
diff --git a/examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py
rename to examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/main.py
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
index 4e85853747e..d5069f8038f 100644
--- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
+++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/requirements.txt
@@ -1 +1,2 @@
 nnunet
+tensorflow
\ No newline at end of file
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
index fd466c5f8d0..36f8d8502f0 100644
--- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
+++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_benchmark.sh
@@ -31,6 +31,9 @@ function init_params {
       --batch_size=*)
           batch_size=$(echo $var |cut -f2 -d=)
       ;;
+      --iters=*)
+          iters=$(echo $var |cut -f2 -d=)
+      ;;
       *)
           echo "Error: No such parameter: ${var}"
           exit 1
@@ -49,12 +52,13 @@ function run_benchmark {
         extra_cmd=""
     fi
 
-    python run_accuracy.py \
+    python main.py \
       --input-model=${input_model} \
       --data-location=${dataset_location} \
       --calib-preprocess=${BUILD_DIR}/calib_preprocess \
       --batch-size=${batch_size} \
       --mode=${mode} \
+      --iters=${iters} \
       ${extra_cmd}
 }
 
diff --git a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
index 749b0c09c2b..79256545613 100644
--- a/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
+++ b/examples/tensorflow/semantic_image_segmentation/3dunet-mlperf/quantization/ptq/run_quant.sh
@@ -37,7 +37,7 @@ function init_params {
 
 # run_tuning
 function run_tuning {
-    python run_accuracy.py \
+    python main.py \
       --input-model=${input_model} \
       --output-model=${output_model} \
       --data-location=${dataset_location} \
diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
index d7019887980..6fa291d0b36 100644
--- a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
+++ b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/README.md
@@ -1,4 +1,4 @@
-Step-by-Step (Deprecated)
+Step-by-Step
 ============
 
 This document is used to list steps of reproducing TensorFlow style transfer Intel® Neural Compressor tuning zoo result.
@@ -61,7 +61,7 @@ optional arguments:
 
 ```shell
 wget https://storage.googleapis.com/download.magenta.tensorflow.org/models/arbitrary_style_transfer.tar.gz
-tar -xvzf arbitrary_style_transfer.tar.gz ./model
+tar -xvzf arbitrary_style_transfer.tar.gz
 ```
 
 ### 3. Prepare Dataset
@@ -70,22 +70,12 @@ There are two folders named style_images and content_images in current folder. P
 
 # Run Command
   ```shell
-  python style_tune.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt
+  python main.py --output_dir=./result --style_images_paths=./style_images --content_images_paths=./content_images --input_model=./model/model.ckpt
   ```
 
 
 ## Quantization Config
 
-The Quantization Config class has default parameters setting for running on Intel CPUs. If running this example on Intel GPUs, the 'backend' parameter should be set to 'itex' and the 'device' parameter should be set to 'gpu'.
-
-```
-config = PostTrainingQuantConfig(
-    device="gpu",
-    backend="itex",
-    ...
-    )
-```
-
 ## Quantization
   ```shell
   bash run_quant.sh --dataset_location=style_images/,content_images/ --input_model=./model/model.ckpt --output_model=saved_model
@@ -119,13 +109,9 @@ Here we set the input tensor and output tensors name into *inputs* and *outputs*
 
 After prepare step is done, we just need add 2 lines to get the quantized model.
 ```python
-from neural_compressor import quantization
-from neural_compressor.config import PostTrainingQuantConfig
-conf = PostTrainingQuantConfig(inputs=['style_input', 'content_input'],
-                                outputs=['transformer/expand/conv3/conv/Sigmoid'],
-                                calibration_sampling_size=[50, 100])
-quantized_model = quantization.fit(args.input_graph, conf=conf, calib_dataloader=dataloader,
-            eval_dataloader==dataloader)
-```
+from neural_compressor.tensorflow import StaticQuantConfig, quantize_model
 
-The Intel® Neural Compressor quantizer.fit() function will return a best quantized model during timeout constrain.
+quant_config = StaticQuantConfig()
+q_model = quantize_model(graph, quant_config, calib_dataloader)
+q_model.save(FLAGS.output_model)
+```
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py
rename to examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/data_process.py
diff --git a/examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py
similarity index 100%
rename from examples/3.x_api/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py
rename to examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/main.py
diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
index 08de33cba5b..41fee820958 100644
--- a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
+++ b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_benchmark.sh
@@ -48,7 +48,7 @@ function run_benchmark {
     content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
     echo "$style_images, $content_images"
 
-    python style_tune.py \
+    python main.py \
             --input_model "${input_model}" \
             --style_images_paths "${style_images}" \
             --content_images_paths "${content_images}" \
diff --git a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
index d5adc1060bb..4fdfdd2e8a5 100644
--- a/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
+++ b/examples/tensorflow/style_transfer/arbitrary_style_transfer/quantization/ptq/run_quant.sh
@@ -38,7 +38,7 @@ function run_tuning {
     content_images=$(echo ${dataset_location} | awk -F ',' '{print $2}')
     echo "$style_images, $content_images"
 
-    python style_tune.py \
+    python main.py \
             --input_model "${input_model}" \
             --style_images_paths "${style_images}" \
             --content_images_paths "${content_images}" \
diff --git a/neural_compressor/compression/pruner/README.md b/neural_compressor/compression/pruner/README.md
index 07897fec4f1..cfe6f988c71 100644
--- a/neural_compressor/compression/pruner/README.md
+++ b/neural_compressor/compression/pruner/README.md
@@ -91,7 +91,7 @@ Pruning patterns defines the rules of pruned weights' arrangements in space. Int
   An advantage of channel pruning is that in some particular structure(feed forward parts in Transformers etc.), pruned channels can be removed permanently from original weights without influencing other dense channels. Via this process, we can decrease these weights' size and obtain direct improvements of inference speed, without using hardware related optimization tools like [Intel Extension for Transformers](https://github.com/intel/intel-extension-for-transformers). 
 
 
-  We name this process as <span id="click">**Model Auto Slim**(experimental feature)</span> and currently we have validated that this process can significantly improve some popular transformer model's inference speed. Currently this method is under development and only supports some particular structures. Please refer more details of such method in this [model slim example](../../../examples/pytorch/nlp/huggingface_models/question-answering/model_slim/).
+  We name this process as <span id="click">**Model Auto Slim**(experimental feature)</span> and currently we have validated that this process can significantly improve some popular transformer model's inference speed. Currently this method is under development and only supports some particular structures. Please refer more details of such method in this [model slim example](../../../examples/deprecated/pytorch/nlp/huggingface_models/question-answering/model_slim/).
 
 - Unstructured Pruning
 
@@ -476,29 +476,29 @@ The pruning technique  is validated on typical models across various domains (in
 
 - Language Modeling
 
-  Sparsity is effectively implemented through various pruning patterns in Causal language modeling (CLM) tasks. [Language-modeling examples](../../../examples/pytorch/nlp/huggingface_models/language-modeling/pruning/eager).
+  Sparsity is effectively implemented through various pruning patterns in Causal language modeling (CLM) tasks. [Language-modeling examples](../../../examples/deprecated/pytorch/nlp/huggingface_models/language-modeling/pruning/eager).
 
 - Text Classification
 
-  Sparsity is implemented in different pruning patterns of MRPC and SST-2 tasks [Text-classification examples](../../../examples/pytorch/nlp/huggingface_models/text-classification/pruning/eager).
+  Sparsity is implemented in different pruning patterns of MRPC and SST-2 tasks [Text-classification examples](../../../examples/deprecated/pytorch/nlp/huggingface_models/text-classification/pruning/eager).
 
 - Question Answering
 
-  Multiple examples of sparse models were obtained on the SQuAD-v1.1 dataset [Question-answering examples](../../../examples/pytorch/nlp/huggingface_models/question-answering/pruning/eager).
+  Multiple examples of sparse models were obtained on the SQuAD-v1.1 dataset [Question-answering examples](../../../examples/deprecated/pytorch/nlp/huggingface_models/question-answering/pruning/eager).
 
 - Language Translation (Experimental, sparsity 0.8, pattern 4x1, BLEU 25.63(dense) vs 24.35(sparse))
 
-  Pruning Flan-T5-small model on English-Romanian translation task [Translation examples](../../../examples/pytorch/nlp/huggingface_models/translation/pruning/eager).
+  Pruning Flan-T5-small model on English-Romanian translation task [Translation examples](../../../examples/deprecated/pytorch/nlp/huggingface_models/translation/pruning/eager).
 
 - Object Detection (Experimental, sparsity 0.8, pattern 4x1, mAP 0.404(dense) vs 0.381(sparse))
 
-  Pruning on YOLOv5 model using coco dataset [Object-detection examples](../../../examples/pytorch/object_detection/yolo_v5/pruning/eager).
+  Pruning on YOLOv5 model using coco dataset [Object-detection examples](../../../examples/deprecated/pytorch/object_detection/yolo_v5/pruning/eager).
 
 - Image Recognition (Experimental, sparsity 0.75, pattern 2x1, top1 acc 0.801(dense) vs 0.7895(sparse))
 
-  Pruning on ResNet50 model using ImageNet dataset [Image-recognition examples](../../../examples/pytorch/image_recognition/ResNet50/pruning/eager/).
+  Pruning on ResNet50 model using ImageNet dataset [Image-recognition examples](../../../examples/deprecated/pytorch/image_recognition/ResNet50/pruning/eager/).
 
-Please refer to [pruning examples](../../../examples/README.md#Pruning-1) for more information.
+Please refer to [pruning examples](../../../examples/deprecated/README.md#Pruning-1) for more information.
 
 ## Sparse Model Deployment