add op_types_to_quantize to get_qnn_qdq_config (microsoft#23458)

xieofxie · hualxie · web-flow · commit 8b77007affd5 · 2025-02-12T11:20:16.000-08:00
### Description  add op_types_to_quantize to get_qnn_qdq_config so support only quantize part of it. In my understanding, if ops are not quantized, they will use fp16 to run if supported? https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/SupportedOps.html ### Motivation and Context  See scenario in issue in Olive microsoft/Olive#1552 --------- Co-authored-by: hualxie <hualxie@microsoft.com>
diff --git a/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py b/onnxruntime/python/tools/quantization/execution_providers/qnn/quant_config.py
@@ -54,6 +54,7 @@ def get_qnn_qdq_config(
     keep_removable_activations: bool = False,
     stride: int | None = None,
     calibration_providers: list[str] | None = None,
+    op_types_to_quantize: list[str] | None = None,
 ) -> StaticQuantConfig:
     """
     Returns a static quantization configuration suitable for running QDQ models on QNN EP.
@@ -120,6 +121,7 @@ def get_qnn_qdq_config(
                         QuantizeLinear/DequantizeLinear operators from the model.
         calibration_providers: Execution providers to run the session during calibration. Default is None which uses
             [ "CPUExecutionProvider" ].
+        op_types_to_quantize: If set to None, all operator types will be quantized except for OP_TYPES_TO_EXCLUDE
 
     Returns:
         A StaticQuantConfig object
@@ -164,7 +166,11 @@ def get_qnn_qdq_config(
         name_to_initializer,
     )
 
+    op_types_to_quantize_set = set(op_types_to_quantize) if op_types_to_quantize else None
+
     for node in model.graph.node:
+        if op_types_to_quantize_set and node.op_type not in op_types_to_quantize_set:
+            continue
         op_types.add(node.op_type)
         qnn_compat.process_node(node)
 
@@ -192,7 +198,9 @@ def get_qnn_qdq_config(
         calibrate_method=calibrate_method,
         activation_type=activation_type,
         weight_type=weight_type,
-        op_types_to_quantize=list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
+        op_types_to_quantize=op_types_to_quantize
+        if op_types_to_quantize
+        else list(op_types.difference(OP_TYPES_TO_EXCLUDE)),
         per_channel=per_channel,
         use_external_data_format=(model_has_external_data or model.ByteSize() >= MODEL_SIZE_THRESHOLD),
         calibration_providers=calibration_providers,