Skip to content

Commit d452e60

Browse files
committed
[Backend Tester] Add test flows for QNN quantization
ghstack-source-id: 0b94375 ghstack-comment-id: 3195495418 Pull-Request: #13469
1 parent 787f5ed commit d452e60

File tree

4 files changed

+96
-13
lines changed

4 files changed

+96
-13
lines changed

.github/workflows/nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
strategy:
4343
fail-fast: false
4444
matrix:
45-
flow: [vulkan, xnnpack, xnnpack_static_int8_per_channel]
45+
flow: [qualcomm, qualcomm_16a16w, qualcomm_16a8w, qualcomm_16a4w, qualcomm_16a4w_block, qualcomm_8a8w, vulkan, xnnpack, xnnpack_static_int8_per_channel]
4646
suite: [models, operators]
4747
with:
4848
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

backends/qualcomm/tests/tester.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
from typing import Any, List, Optional, Tuple
7+
from typing import Any, List, Optional, Sequence, Tuple
88

99
import executorch
1010
import executorch.backends.test.harness.stages as BaseStages
1111

1212
import torch
1313
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
1414
from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner
15+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
1516
from executorch.backends.qualcomm.utils.utils import (
1617
generate_htp_compiler_spec,
1718
generate_qnn_executorch_compiler_spec,
@@ -21,9 +22,32 @@
2122
from executorch.backends.test.harness.stages import StageType
2223
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
2324
from executorch.exir.backend.partitioner import Partitioner
25+
from torch.ao.quantization.quantize_pt2e import (
26+
convert_pt2e,
27+
prepare_pt2e,
28+
prepare_qat_pt2e,
29+
)
2430
from torch.export import ExportedProgram
2531

2632

33+
class Quantize(BaseStages.Quantize):
34+
def __init__(
35+
self,
36+
quantizer: QnnQuantizer,
37+
quantization_config: Optional[Any] = None,
38+
calibrate: bool = True,
39+
calibration_samples: Optional[Sequence[Any]] = None,
40+
is_qat: Optional[bool] = False,
41+
):
42+
super().__init__(
43+
quantizer=quantizer,
44+
calibrate=calibrate,
45+
calibration_samples=calibration_samples,
46+
is_qat=is_qat,
47+
set_global=False,
48+
)
49+
50+
2751
class Partition(BaseStages.Partition):
2852
def __init__(self, partitioner: Optional[Partitioner] = None):
2953
super().__init__(
@@ -37,8 +61,9 @@ def __init__(
3761
partitioners: Optional[List[Partitioner]] = None,
3862
edge_compile_config: Optional[EdgeCompileConfig] = None,
3963
soc_model: str = "SM8650",
64+
use_fp16: bool = True,
4065
):
41-
backend_options = generate_htp_compiler_spec(use_fp16=True)
66+
backend_options = generate_htp_compiler_spec(use_fp16=use_fp16)
4267
self.chipset = get_soc_to_chipset_map()[soc_model]
4368
self.compiler_specs = generate_qnn_executorch_compiler_spec(
4469
soc_model=self.chipset,
@@ -73,15 +98,17 @@ def __init__(
7398
module: torch.nn.Module,
7499
example_inputs: Tuple[torch.Tensor],
75100
dynamic_shapes: Optional[Tuple[Any]] = None,
101+
use_fp16: bool = True,
76102
):
103+
def create_to_edge_transform_and_lower(*args, **kwargs):
104+
kwargs["use_fp16"] = use_fp16
105+
return ToEdgeTransformAndLower(*args, **kwargs)
106+
77107
# Specialize for Qualcomm
78-
stage_classes = (
79-
executorch.backends.test.harness.Tester.default_stage_classes()
80-
| {
81-
StageType.PARTITION: Partition,
82-
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
83-
}
84-
)
108+
stage_classes = executorch.backends.test.harness.Tester.default_stage_classes() | {
109+
StageType.PARTITION: Partition,
110+
StageType.TO_EDGE_TRANSFORM_AND_LOWER: create_to_edge_transform_and_lower,
111+
}
85112

86113
super().__init__(
87114
module=module,

backends/test/suite/flow.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,22 @@ def all_flows() -> dict[str, TestFlow]:
8181
logger.info(f"Skipping Vulkan flow registration: {e}")
8282

8383
try:
84-
from executorch.backends.test.suite.flows.qualcomm import QUALCOMM_TEST_FLOW
84+
from executorch.backends.test.suite.flows.qualcomm import (
85+
QUALCOMM_16A16W_TEST_FLOW,
86+
QUALCOMM_16A4W_BLOCK_TEST_FLOW,
87+
QUALCOMM_16A4W_TEST_FLOW,
88+
QUALCOMM_16A8W_TEST_FLOW,
89+
QUALCOMM_8A8W_TEST_FLOW,
90+
QUALCOMM_TEST_FLOW,
91+
)
8592

8693
flows += [
8794
QUALCOMM_TEST_FLOW,
95+
QUALCOMM_16A16W_TEST_FLOW,
96+
QUALCOMM_16A8W_TEST_FLOW,
97+
QUALCOMM_16A4W_TEST_FLOW,
98+
QUALCOMM_16A4W_BLOCK_TEST_FLOW,
99+
QUALCOMM_8A8W_TEST_FLOW,
88100
]
89101
except Exception as e:
90102
logger.info(f"Skipping Qualcomm flow registration: {e}")

backends/test/suite/flows/qualcomm.py

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,61 @@
1-
from executorch.backends.qualcomm.tests.tester import QualcommTester
1+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer, QuantDtype
2+
from executorch.backends.qualcomm.tests.tester import QualcommTester, Quantize
23
from executorch.backends.test.suite.flow import TestFlow
4+
from torchao.quantization.pt2e import MovingAverageMinMaxObserver
35

46

57
def _create_qualcomm_flow(
68
name: str,
79
quantize: bool = False,
10+
quant_dtype: QuantDtype | None = None,
11+
per_channel_conv=True,
12+
per_channel_linear=False,
13+
is_qat=False,
14+
use_fp16=True,
815
) -> TestFlow:
16+
if quantize and quant_dtype is None:
17+
raise RuntimeError("Quant dtype must be provided when quantize is true.")
18+
19+
def create_tester(*args, **kwargs) -> QualcommTester:
20+
kwargs["use_fp16"] = (use_fp16,)
21+
return QualcommTester(*args, **kwargs)
22+
23+
def create_quantize_stage() -> Quantize:
24+
quantizer = QnnQuantizer()
25+
quantizer.set_default_quant_config(
26+
quant_dtype,
27+
is_qat=is_qat,
28+
is_conv_per_channel=per_channel_conv,
29+
is_linear_per_channel=per_channel_linear,
30+
act_observer=MovingAverageMinMaxObserver,
31+
)
32+
return Quantize(quantizer=quantizer)
33+
934
return TestFlow(
1035
name,
1136
backend="qualcomm",
12-
tester_factory=QualcommTester,
37+
tester_factory=create_tester,
1338
quantize=quantize,
39+
quantize_stage_factory=create_quantize_stage if quantize else None,
1440
)
1541

1642

1743
QUALCOMM_TEST_FLOW = _create_qualcomm_flow("qualcomm")
44+
QUALCOMM_16A16W_TEST_FLOW = _create_qualcomm_flow(
45+
"qualcomm_16a16w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False
46+
)
47+
QUALCOMM_16A8W_TEST_FLOW = _create_qualcomm_flow(
48+
"qualcomm_16a8w", quantize=True, quant_dtype=QuantDtype.use_16a8w, use_fp16=False
49+
)
50+
QUALCOMM_16A4W_TEST_FLOW = _create_qualcomm_flow(
51+
"qualcomm_16a4w", quantize=True, quant_dtype=QuantDtype.use_16a4w, use_fp16=False
52+
)
53+
QUALCOMM_16A4W_BLOCK_TEST_FLOW = _create_qualcomm_flow(
54+
"qualcomm_16a4w_block",
55+
quantize=True,
56+
quant_dtype=QuantDtype.use_8a8w,
57+
use_fp16=False,
58+
)
59+
QUALCOMM_8A8W_TEST_FLOW = _create_qualcomm_flow(
60+
"qualcomm_8a8w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False
61+
)

0 commit comments

Comments
 (0)