From b29d2ec707601a041d8b3bc8220948e5c48d25be Mon Sep 17 00:00:00 2001
From: winskuo-quic <quic_winskuo@quicinc.com>
Date: Mon, 24 Mar 2025 16:28:02 +0800
Subject: [PATCH 1/2] Qualcomm AI Engine Direct - Flags for CI

---
 .ci/scripts/test_model.sh                     |  2 +-
 .../qualcomm/_passes/annotate_quant_attrs.py  | 46 +++++++------------
 examples/qualcomm/scripts/deeplab_v3.py       |  6 ++-
 examples/qualcomm/scripts/edsr.py             |  6 ++-
 examples/qualcomm/scripts/inception_v3.py     |  6 ++-
 examples/qualcomm/scripts/inception_v4.py     |  8 +++-
 examples/qualcomm/scripts/mobilenet_v2.py     |  6 ++-
 examples/qualcomm/scripts/mobilenet_v3.py     |  6 ++-
 examples/qualcomm/scripts/torchvision_vit.py  |  6 ++-
 examples/qualcomm/scripts/wav2letter.py       |  4 +-
 examples/qualcomm/utils.py                    |  7 +++
 11 files changed, 63 insertions(+), 40 deletions(-)

diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index fa922e897d7..fa40f8343be 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -205,7 +205,7 @@ test_model_with_qnn() {
   # TODO(guangyang): Make QNN chipset matches the target device
   QNN_CHIPSET=SM8450
 
-  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS
+  "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS
   EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit)
 }
 
diff --git a/backends/qualcomm/_passes/annotate_quant_attrs.py b/backends/qualcomm/_passes/annotate_quant_attrs.py
index ed19a54b7e7..b4f14fc28cd 100644
--- a/backends/qualcomm/_passes/annotate_quant_attrs.py
+++ b/backends/qualcomm/_passes/annotate_quant_attrs.py
@@ -31,15 +31,12 @@
 class AnnotateQuantAttrs(ExportPass):
     """
     Add "quant_attrs" to graph nodes' meta from the QDQ information
-    generated after quatization process.
+    generated after quantization process.
     """
 
-    def __init__(
-        self, edge_program: torch.export.ExportedProgram, skip_advanced_requat: bool
-    ):
+    def __init__(self, edge_program: torch.export.ExportedProgram):
         super(AnnotateQuantAttrs, self).__init__()
         self.edge_program = edge_program
-        self.skip_advanced_requant = skip_advanced_requat
 
     def _annotate_source_nodes(
         self, quant_node: torch.fx.Node, quant_attrs: Dict[str, Any]
@@ -88,30 +85,21 @@ def _annotate_requant(self, n):
                 dq_attrs = get_quant_attrs(self.edge_program, dq_node)
                 # TODO: Store multiple pairs of requantize attributes when we have an op builder
                 # that has multiple outputs that requires quant attributes.
-                if self.skip_advanced_requant:
-                    if q_attrs[QCOM_DTYPE] != dq_attrs[QCOM_DTYPE]:
-                        dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
-                        user_node = list(dq_node.users)[0]
-                        n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
-                        n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
-                else:
-                    # When dtype is the same but other specs such as scale and offset are different,
-                    # insert requant to improve accuracy.
-                    # Users can turn this feature off if any inference speed drop is observed.
-                    if any(
-                        q_attrs[attr] != dq_attrs[attr]
-                        for attr in [
-                            QCOM_SCALE,
-                            QCOM_ZERO_POINT,
-                            QCOM_QUANT_MIN,
-                            QCOM_QUANT_MAX,
-                            QCOM_DTYPE,
-                        ]
-                    ):
-                        dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
-                        user_node = list(dq_node.users)[0]
-                        n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
-                        n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
+
+                if any(
+                    q_attrs[attr] != dq_attrs[attr]
+                    for attr in [
+                        QCOM_SCALE,
+                        QCOM_ZERO_POINT,
+                        QCOM_QUANT_MIN,
+                        QCOM_QUANT_MAX,
+                        QCOM_DTYPE,
+                    ]
+                ):
+                    dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING]
+                    user_node = list(dq_node.users)[0]
+                    n.args[0].meta.setdefault(QCOM_REQUANTIZE, {})
+                    n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs
 
     # Dequant all the fold_quant parameters back to fp32.
     # If an operation is not supported by QNN and got fallback, it will expect a fp32 param.
diff --git a/examples/qualcomm/scripts/deeplab_v3.py b/examples/qualcomm/scripts/deeplab_v3.py
index 37863878e4a..cb64d904919 100755
--- a/examples/qualcomm/scripts/deeplab_v3.py
+++ b/examples/qualcomm/scripts/deeplab_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 import random
 import re
@@ -74,8 +75,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_dataset(
             data_size=data_num, dataset_dir=args.artifact, download=args.download
diff --git a/examples/qualcomm/scripts/edsr.py b/examples/qualcomm/scripts/edsr.py
index a12a5069c3f..222c04ed1b1 100755
--- a/examples/qualcomm/scripts/edsr.py
+++ b/examples/qualcomm/scripts/edsr.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 import re
 from multiprocessing.connection import Client
@@ -113,8 +114,11 @@ def main(args):
         )
 
     instance = EdsrModel()
-    if args.compile_only:
+    if args.ci:
         inputs = instance.get_example_inputs()
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         dataset = get_dataset(
             args.hr_ref_dir, args.lr_dir, args.default_dataset, args.artifact
diff --git a/examples/qualcomm/scripts/inception_v3.py b/examples/qualcomm/scripts/inception_v3.py
index 5042f941d20..6cfb44adcf7 100755
--- a/examples/qualcomm/scripts/inception_v3.py
+++ b/examples/qualcomm/scripts/inception_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/inception_v4.py b/examples/qualcomm/scripts/inception_v4.py
index a82c976b2d3..92de33f8cba 100755
--- a/examples/qualcomm/scripts/inception_v4.py
+++ b/examples/qualcomm/scripts/inception_v4.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
-        inputs = [(torch.rand(1, 3, 299, 299),)]
+    if args.ci:
+        inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/mobilenet_v2.py b/examples/qualcomm/scripts/mobilenet_v2.py
index 5cfe20f88c0..1b153431741 100755
--- a/examples/qualcomm/scripts/mobilenet_v2.py
+++ b/examples/qualcomm/scripts/mobilenet_v2.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -37,8 +38,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/mobilenet_v3.py b/examples/qualcomm/scripts/mobilenet_v3.py
index f89c4c091bb..e34125bbfca 100644
--- a/examples/qualcomm/scripts/mobilenet_v3.py
+++ b/examples/qualcomm/scripts/mobilenet_v3.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -36,8 +37,11 @@ def main(args):
         )
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/torchvision_vit.py b/examples/qualcomm/scripts/torchvision_vit.py
index 33fcd050ad3..428863daf4b 100755
--- a/examples/qualcomm/scripts/torchvision_vit.py
+++ b/examples/qualcomm/scripts/torchvision_vit.py
@@ -5,6 +5,7 @@
 # LICENSE file in the root directory of this source tree.
 
 import json
+import logging
 import os
 from multiprocessing.connection import Client
 
@@ -28,8 +29,11 @@ def main(args):
     os.makedirs(args.artifact, exist_ok=True)
 
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 3, 224, 224),)]
+        logging.warning(
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
+        )
     else:
         inputs, targets, input_list = get_imagenet_dataset(
             dataset_path=f"{args.dataset}",
diff --git a/examples/qualcomm/scripts/wav2letter.py b/examples/qualcomm/scripts/wav2letter.py
index 7f30d1865b8..e5b97a8241e 100644
--- a/examples/qualcomm/scripts/wav2letter.py
+++ b/examples/qualcomm/scripts/wav2letter.py
@@ -134,10 +134,10 @@ def main(args):
 
     # retrieve dataset, will take some time to download
     data_num = 100
-    if args.compile_only:
+    if args.ci:
         inputs = [(torch.rand(1, 1, 700, 1),)]
         logging.warning(
-            "With compile_only, accuracy will be bad due to insufficient datasets for quantization."
+            "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy."
         )
     else:
         inputs, targets, input_list = get_dataset(
diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index 542739a2898..63ea35e3227 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -585,6 +585,13 @@ def setup_common_args_and_variables():
         action="store_true",
     )
 
+    parser.add_argument(
+        "--ci",
+        help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases.",
+        action="store_true",
+        default=False,
+    )
+
     # QNN_SDK_ROOT might also be an argument, but it is used in various places.
     # So maybe it's fine to just use the environment.
     if "QNN_SDK_ROOT" not in os.environ:

From 2356e190c37695132500b1500f4919ff4457e472 Mon Sep 17 00:00:00 2001
From: winskuo-quic <quic_winskuo@quicinc.com>
Date: Tue, 13 May 2025 09:09:48 +0800
Subject: [PATCH 2/2] Code Review

---
 examples/qualcomm/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py
index 63ea35e3227..d8dab88e998 100755
--- a/examples/qualcomm/utils.py
+++ b/examples/qualcomm/utils.py
@@ -587,7 +587,7 @@ def setup_common_args_and_variables():
 
     parser.add_argument(
         "--ci",
-        help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases.",
+        help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases. It will use random inputs instead of real inputs.",
         action="store_true",
         default=False,
     )