From b29d2ec707601a041d8b3bc8220948e5c48d25be Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Mon, 24 Mar 2025 16:28:02 +0800 Subject: [PATCH 1/2] Qualcomm AI Engine Direct - Flags for CI --- .ci/scripts/test_model.sh | 2 +- .../qualcomm/_passes/annotate_quant_attrs.py | 46 +++++++------------ examples/qualcomm/scripts/deeplab_v3.py | 6 ++- examples/qualcomm/scripts/edsr.py | 6 ++- examples/qualcomm/scripts/inception_v3.py | 6 ++- examples/qualcomm/scripts/inception_v4.py | 8 +++- examples/qualcomm/scripts/mobilenet_v2.py | 6 ++- examples/qualcomm/scripts/mobilenet_v3.py | 6 ++- examples/qualcomm/scripts/torchvision_vit.py | 6 ++- examples/qualcomm/scripts/wav2letter.py | 4 +- examples/qualcomm/utils.py | 7 +++ 11 files changed, 63 insertions(+), 40 deletions(-) diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh index fa922e897d7..fa40f8343be 100755 --- a/.ci/scripts/test_model.sh +++ b/.ci/scripts/test_model.sh @@ -205,7 +205,7 @@ test_model_with_qnn() { # TODO(guangyang): Make QNN chipset matches the target device QNN_CHIPSET=SM8450 - "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --compile_only $EXTRA_FLAGS + "${PYTHON_EXECUTABLE}" -m examples.qualcomm.scripts.${EXPORT_SCRIPT} -b ${CMAKE_OUTPUT_DIR} -m ${QNN_CHIPSET} --ci --compile_only $EXTRA_FLAGS EXPORTED_MODEL=$(find "./${EXPORT_SCRIPT}" -type f -name "${MODEL_NAME}*.pte" -print -quit) } diff --git a/backends/qualcomm/_passes/annotate_quant_attrs.py b/backends/qualcomm/_passes/annotate_quant_attrs.py index ed19a54b7e7..b4f14fc28cd 100644 --- a/backends/qualcomm/_passes/annotate_quant_attrs.py +++ b/backends/qualcomm/_passes/annotate_quant_attrs.py @@ -31,15 +31,12 @@ class AnnotateQuantAttrs(ExportPass): """ Add "quant_attrs" to graph nodes' meta from the QDQ information - generated after quatization process. + generated after quantization process. """ - def __init__( - self, edge_program: torch.export.ExportedProgram, skip_advanced_requat: bool - ): + def __init__(self, edge_program: torch.export.ExportedProgram): super(AnnotateQuantAttrs, self).__init__() self.edge_program = edge_program - self.skip_advanced_requant = skip_advanced_requat def _annotate_source_nodes( self, quant_node: torch.fx.Node, quant_attrs: Dict[str, Any] @@ -88,30 +85,21 @@ def _annotate_requant(self, n): dq_attrs = get_quant_attrs(self.edge_program, dq_node) # TODO: Store multiple pairs of requantize attributes when we have an op builder # that has multiple outputs that requires quant attributes. - if self.skip_advanced_requant: - if q_attrs[QCOM_DTYPE] != dq_attrs[QCOM_DTYPE]: - dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING] - user_node = list(dq_node.users)[0] - n.args[0].meta.setdefault(QCOM_REQUANTIZE, {}) - n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs - else: - # When dtype is the same but other specs such as scale and offset are different, - # insert requant to improve accuracy. - # Users can turn this feature off if any inference speed drop is observed. - if any( - q_attrs[attr] != dq_attrs[attr] - for attr in [ - QCOM_SCALE, - QCOM_ZERO_POINT, - QCOM_QUANT_MIN, - QCOM_QUANT_MAX, - QCOM_DTYPE, - ] - ): - dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING] - user_node = list(dq_node.users)[0] - n.args[0].meta.setdefault(QCOM_REQUANTIZE, {}) - n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs + + if any( + q_attrs[attr] != dq_attrs[attr] + for attr in [ + QCOM_SCALE, + QCOM_ZERO_POINT, + QCOM_QUANT_MIN, + QCOM_QUANT_MAX, + QCOM_DTYPE, + ] + ): + dq_attrs[QCOM_ENCODING] = q_attrs[QCOM_ENCODING] + user_node = list(dq_node.users)[0] + n.args[0].meta.setdefault(QCOM_REQUANTIZE, {}) + n.args[0].meta[QCOM_REQUANTIZE][user_node.name] = dq_attrs # Dequant all the fold_quant parameters back to fp32. # If an operation is not supported by QNN and got fallback, it will expect a fp32 param. diff --git a/examples/qualcomm/scripts/deeplab_v3.py b/examples/qualcomm/scripts/deeplab_v3.py index 37863878e4a..cb64d904919 100755 --- a/examples/qualcomm/scripts/deeplab_v3.py +++ b/examples/qualcomm/scripts/deeplab_v3.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os import random import re @@ -74,8 +75,11 @@ def main(args): ) data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_dataset( data_size=data_num, dataset_dir=args.artifact, download=args.download diff --git a/examples/qualcomm/scripts/edsr.py b/examples/qualcomm/scripts/edsr.py index a12a5069c3f..222c04ed1b1 100755 --- a/examples/qualcomm/scripts/edsr.py +++ b/examples/qualcomm/scripts/edsr.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os import re from multiprocessing.connection import Client @@ -113,8 +114,11 @@ def main(args): ) instance = EdsrModel() - if args.compile_only: + if args.ci: inputs = instance.get_example_inputs() + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: dataset = get_dataset( args.hr_ref_dir, args.lr_dir, args.default_dataset, args.artifact diff --git a/examples/qualcomm/scripts/inception_v3.py b/examples/qualcomm/scripts/inception_v3.py index 5042f941d20..6cfb44adcf7 100755 --- a/examples/qualcomm/scripts/inception_v3.py +++ b/examples/qualcomm/scripts/inception_v3.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os from multiprocessing.connection import Client @@ -37,8 +38,11 @@ def main(args): ) data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_imagenet_dataset( dataset_path=f"{args.dataset}", diff --git a/examples/qualcomm/scripts/inception_v4.py b/examples/qualcomm/scripts/inception_v4.py index a82c976b2d3..92de33f8cba 100755 --- a/examples/qualcomm/scripts/inception_v4.py +++ b/examples/qualcomm/scripts/inception_v4.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os from multiprocessing.connection import Client @@ -37,8 +38,11 @@ def main(args): ) data_num = 100 - if args.compile_only: - inputs = [(torch.rand(1, 3, 299, 299),)] + if args.ci: + inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_imagenet_dataset( dataset_path=f"{args.dataset}", diff --git a/examples/qualcomm/scripts/mobilenet_v2.py b/examples/qualcomm/scripts/mobilenet_v2.py index 5cfe20f88c0..1b153431741 100755 --- a/examples/qualcomm/scripts/mobilenet_v2.py +++ b/examples/qualcomm/scripts/mobilenet_v2.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os from multiprocessing.connection import Client @@ -37,8 +38,11 @@ def main(args): ) data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_imagenet_dataset( dataset_path=f"{args.dataset}", diff --git a/examples/qualcomm/scripts/mobilenet_v3.py b/examples/qualcomm/scripts/mobilenet_v3.py index f89c4c091bb..e34125bbfca 100644 --- a/examples/qualcomm/scripts/mobilenet_v3.py +++ b/examples/qualcomm/scripts/mobilenet_v3.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os from multiprocessing.connection import Client @@ -36,8 +37,11 @@ def main(args): ) data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_imagenet_dataset( dataset_path=f"{args.dataset}", diff --git a/examples/qualcomm/scripts/torchvision_vit.py b/examples/qualcomm/scripts/torchvision_vit.py index 33fcd050ad3..428863daf4b 100755 --- a/examples/qualcomm/scripts/torchvision_vit.py +++ b/examples/qualcomm/scripts/torchvision_vit.py @@ -5,6 +5,7 @@ # LICENSE file in the root directory of this source tree. import json +import logging import os from multiprocessing.connection import Client @@ -28,8 +29,11 @@ def main(args): os.makedirs(args.artifact, exist_ok=True) data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 3, 224, 224),)] + logging.warning( + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." + ) else: inputs, targets, input_list = get_imagenet_dataset( dataset_path=f"{args.dataset}", diff --git a/examples/qualcomm/scripts/wav2letter.py b/examples/qualcomm/scripts/wav2letter.py index 7f30d1865b8..e5b97a8241e 100644 --- a/examples/qualcomm/scripts/wav2letter.py +++ b/examples/qualcomm/scripts/wav2letter.py @@ -134,10 +134,10 @@ def main(args): # retrieve dataset, will take some time to download data_num = 100 - if args.compile_only: + if args.ci: inputs = [(torch.rand(1, 1, 700, 1),)] logging.warning( - "With compile_only, accuracy will be bad due to insufficient datasets for quantization." + "This option is for CI to verify the export flow. It uses random input and will result in poor accuracy." ) else: inputs, targets, input_list = get_dataset( diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 542739a2898..63ea35e3227 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -585,6 +585,13 @@ def setup_common_args_and_variables(): action="store_true", ) + parser.add_argument( + "--ci", + help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases.", + action="store_true", + default=False, + ) + # QNN_SDK_ROOT might also be an argument, but it is used in various places. # So maybe it's fine to just use the environment. if "QNN_SDK_ROOT" not in os.environ: From 2356e190c37695132500b1500f4919ff4457e472 Mon Sep 17 00:00:00 2001 From: winskuo-quic Date: Tue, 13 May 2025 09:09:48 +0800 Subject: [PATCH 2/2] Code Review --- examples/qualcomm/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/qualcomm/utils.py b/examples/qualcomm/utils.py index 63ea35e3227..d8dab88e998 100755 --- a/examples/qualcomm/utils.py +++ b/examples/qualcomm/utils.py @@ -587,7 +587,7 @@ def setup_common_args_and_variables(): parser.add_argument( "--ci", - help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases.", + help="This flag is for Continuous Integration(CI) purpose and is NOT recommended to turn on for typical use cases. It will use random inputs instead of real inputs.", action="store_true", default=False, )