Skip to content

Commit d78e371

Browse files
committed
[Backend Tester] Add test flows for QNN quantization
ghstack-source-id: 03b0180 ghstack-comment-id: 3195495418 Pull-Request: #13469
1 parent 5100c8b commit d78e371

File tree

6 files changed

+129
-25
lines changed

6 files changed

+129
-25
lines changed

.ci/scripts/test_backend_linux.sh

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,32 @@ eval "$(conda shell.bash hook)"
1818
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
1919
conda activate "${CONDA_ENV}"
2020

21-
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
22-
source .ci/scripts/setup-vulkan-linux-deps.sh
21+
export PYTHON_EXECUTABLE=python
2322

2423
# CMake options to use, in addition to the defaults.
25-
EXTRA_BUILD_ARGS="-DEXECUTORCH_BUILD_VULKAN=ON"
24+
EXTRA_BUILD_ARGS=""
25+
26+
if [[ "$FLOW" == *qualcomm* ]]; then
27+
# Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
28+
# Qualcomm build. TODO (gjcomer) Clean this up once the QNN pybinding integration is
29+
# cleaned up.
30+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
31+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
32+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
33+
QNN_X86_LIB_DIR=`realpath build-x86/lib/`
34+
QNN_SDK_ROOT="/tmp/qnn/2.28.0.241029"
35+
export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
36+
37+
# TODO Get SDK root from install scripts
38+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
39+
fi
40+
41+
if [[ "$FLOW" == *vulkan* ]]; then
42+
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
43+
source .ci/scripts/setup-vulkan-linux-deps.sh
44+
45+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_VULKAN=ON"
46+
fi
2647

2748
# We need the runner to test the built library.
2849
PYTHON_EXECUTABLE=python CMAKE_ARGS="$EXTRA_BUILD_ARGS" .ci/scripts/setup-linux.sh --build-tool cmake --build-mode Release --editable true

.github/workflows/nightly.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
strategy:
4343
fail-fast: false
4444
matrix:
45-
flow: [vulkan, xnnpack, xnnpack_static_int8_per_channel]
45+
flow: [qualcomm, qualcomm_16a16w, qualcomm_16a8w, qualcomm_16a4w, qualcomm_16a4w_block, qualcomm_8a8w, vulkan, xnnpack, xnnpack_static_int8_per_channel]
4646
suite: [models, operators]
4747
with:
4848
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

backends/qualcomm/scripts/install_qnn_sdk.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ source "${SCRIPT_DIR}/qnn_config.sh"
99
# Function to install Android NDK (only if not already set)
1010
setup_android_ndk() {
1111
# Check if ANDROID_NDK_ROOT is already set and valid
12-
if [ -n "${ANDROID_NDK_ROOT}" ] && [ -d "${ANDROID_NDK_ROOT}" ]; then
12+
if [ -n "${ANDROID_NDK_ROOT:-}" ] && [ -d "${ANDROID_NDK_ROOT:-}" ]; then
1313
echo "Android NDK already set to ${ANDROID_NDK_ROOT} - skipping installation"
1414
return
1515
fi
@@ -41,7 +41,7 @@ verify_pkg_installed() {
4141

4242
install_qnn() {
4343
# Check if QNN_SDK_ROOT is already set and valid
44-
if [ -n "${QNN_SDK_ROOT}" ] && [ -d "${QNN_SDK_ROOT}" ]; then
44+
if [ -n "${QNN_SDK_ROOT:-}" ] && [ -d "${QNN_SDK_ROOT:-}" ]; then
4545
echo "QNN SDK already set to ${QNN_SDK_ROOT} - skipping installation"
4646
return
4747
fi
@@ -141,9 +141,9 @@ setup_libcpp() {
141141
popd >/dev/null
142142

143143
# Set environment variables
144-
export CPLUS_INCLUDE_PATH="${INSTALL_DIR}/include:$CPLUS_INCLUDE_PATH"
145-
export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:$LD_LIBRARY_PATH"
146-
export LIBRARY_PATH="${INSTALL_DIR}/lib:$LIBRARY_PATH"
144+
export CPLUS_INCLUDE_PATH="${INSTALL_DIR}/include:${CPLUS_INCLUDE_PATH:-}"
145+
export LD_LIBRARY_PATH="${INSTALL_DIR}/lib:${LD_LIBRARY_PATH:-}"
146+
export LIBRARY_PATH="${INSTALL_DIR}/lib:${LIBRARY_PATH:-}"
147147

148148
echo "libc++ installed to ${INSTALL_DIR}"
149149
}

backends/qualcomm/tests/tester.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7-
from typing import Any, List, Optional, Tuple
7+
from typing import Any, List, Optional, Sequence, Tuple
88

99
import executorch
1010
import executorch.backends.test.harness.stages as BaseStages
1111

1212
import torch
1313
from executorch.backends.qualcomm._passes.qnn_pass_manager import QnnPassManager
1414
from executorch.backends.qualcomm.partition.qnn_partitioner import QnnPartitioner
15+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer
1516
from executorch.backends.qualcomm.utils.utils import (
1617
generate_htp_compiler_spec,
1718
generate_qnn_executorch_compiler_spec,
@@ -21,9 +22,32 @@
2122
from executorch.backends.test.harness.stages import StageType
2223
from executorch.exir import EdgeCompileConfig, to_edge_transform_and_lower
2324
from executorch.exir.backend.partitioner import Partitioner
25+
from torch.ao.quantization.quantize_pt2e import (
26+
convert_pt2e,
27+
prepare_pt2e,
28+
prepare_qat_pt2e,
29+
)
2430
from torch.export import ExportedProgram
2531

2632

33+
class Quantize(BaseStages.Quantize):
34+
def __init__(
35+
self,
36+
quantizer: QnnQuantizer,
37+
quantization_config: Optional[Any] = None,
38+
calibrate: bool = True,
39+
calibration_samples: Optional[Sequence[Any]] = None,
40+
is_qat: Optional[bool] = False,
41+
):
42+
super().__init__(
43+
quantizer=quantizer,
44+
calibrate=calibrate,
45+
calibration_samples=calibration_samples,
46+
is_qat=is_qat,
47+
set_global=False,
48+
)
49+
50+
2751
class Partition(BaseStages.Partition):
2852
def __init__(self, partitioner: Optional[Partitioner] = None):
2953
super().__init__(
@@ -37,8 +61,9 @@ def __init__(
3761
partitioners: Optional[List[Partitioner]] = None,
3862
edge_compile_config: Optional[EdgeCompileConfig] = None,
3963
soc_model: str = "SM8650",
64+
use_fp16: bool = True,
4065
):
41-
backend_options = generate_htp_compiler_spec(use_fp16=True)
66+
backend_options = generate_htp_compiler_spec(use_fp16=use_fp16)
4267
self.chipset = get_soc_to_chipset_map()[soc_model]
4368
self.compiler_specs = generate_qnn_executorch_compiler_spec(
4469
soc_model=self.chipset,
@@ -73,15 +98,17 @@ def __init__(
7398
module: torch.nn.Module,
7499
example_inputs: Tuple[torch.Tensor],
75100
dynamic_shapes: Optional[Tuple[Any]] = None,
101+
use_fp16: bool = True,
76102
):
103+
def create_to_edge_transform_and_lower(*args, **kwargs):
104+
kwargs["use_fp16"] = use_fp16
105+
return ToEdgeTransformAndLower(*args, **kwargs)
106+
77107
# Specialize for Qualcomm
78-
stage_classes = (
79-
executorch.backends.test.harness.Tester.default_stage_classes()
80-
| {
81-
StageType.PARTITION: Partition,
82-
StageType.TO_EDGE_TRANSFORM_AND_LOWER: ToEdgeTransformAndLower,
83-
}
84-
)
108+
stage_classes = executorch.backends.test.harness.Tester.default_stage_classes() | {
109+
StageType.PARTITION: Partition,
110+
StageType.TO_EDGE_TRANSFORM_AND_LOWER: create_to_edge_transform_and_lower,
111+
}
85112

86113
super().__init__(
87114
module=module,

backends/test/suite/flow.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,24 @@ def all_flows() -> dict[str, TestFlow]:
8181
logger.info(f"Skipping Vulkan flow registration: {e}")
8282

8383
try:
84-
from executorch.backends.test.suite.flows.qualcomm import QUALCOMM_TEST_FLOW
84+
from executorch.backends.test.suite.flows.qualcomm import (
85+
QNN_16A16W_TEST_FLOW,
86+
QNN_16A4W_BLOCK_TEST_FLOW,
87+
QNN_16A4W_TEST_FLOW,
88+
QNN_16A8W_TEST_FLOW,
89+
QNN_8A8W_TEST_FLOW,
90+
QNN_TEST_FLOW,
91+
)
8592

8693
flows += [
87-
QUALCOMM_TEST_FLOW,
94+
QNN_TEST_FLOW,
95+
QNN_16A16W_TEST_FLOW,
96+
QNN_16A8W_TEST_FLOW,
97+
QNN_16A4W_TEST_FLOW,
98+
QNN_16A4W_BLOCK_TEST_FLOW,
99+
QNN_8A8W_TEST_FLOW,
88100
]
89101
except Exception as e:
90-
logger.info(f"Skipping Qualcomm flow registration: {e}")
102+
logger.info(f"Skipping QNN flow registration: {e}")
91103

92104
return {f.name: f for f in flows if f is not None}

backends/test/suite/flows/qualcomm.py

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,61 @@
1-
from executorch.backends.qualcomm.tests.tester import QualcommTester
1+
from executorch.backends.qualcomm.quantizer.quantizer import QnnQuantizer, QuantDtype
2+
from executorch.backends.qualcomm.tests.tester import QualcommTester, Quantize
23
from executorch.backends.test.suite.flow import TestFlow
4+
from torchao.quantization.pt2e import MovingAverageMinMaxObserver
35

46

5-
def _create_qualcomm_flow(
7+
def _create_qnn_flow(
68
name: str,
79
quantize: bool = False,
10+
quant_dtype: QuantDtype | None = None,
11+
per_channel_conv=True,
12+
per_channel_linear=False,
13+
is_qat=False,
14+
use_fp16=True,
815
) -> TestFlow:
16+
if quantize and quant_dtype is None:
17+
raise RuntimeError("Quant dtype must be provided when quantize is true.")
18+
19+
def create_tester(*args, **kwargs) -> QualcommTester:
20+
kwargs["use_fp16"] = (use_fp16,)
21+
return QualcommTester(*args, **kwargs)
22+
23+
def create_quantize_stage() -> Quantize:
24+
quantizer = QnnQuantizer()
25+
quantizer.set_default_quant_config(
26+
quant_dtype,
27+
is_qat=is_qat,
28+
is_conv_per_channel=per_channel_conv,
29+
is_linear_per_channel=per_channel_linear,
30+
act_observer=MovingAverageMinMaxObserver,
31+
)
32+
return Quantize(quantizer=quantizer)
33+
934
return TestFlow(
1035
name,
1136
backend="qualcomm",
12-
tester_factory=QualcommTester,
37+
tester_factory=create_tester,
1338
quantize=quantize,
39+
quantize_stage_factory=create_quantize_stage if quantize else None,
1440
)
1541

1642

17-
QUALCOMM_TEST_FLOW = _create_qualcomm_flow("qualcomm")
43+
QNN_TEST_FLOW = _create_qnn_flow("qnn")
44+
QNN_16A16W_TEST_FLOW = _create_qnn_flow(
45+
"qnn_16a16w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False
46+
)
47+
QNN_16A8W_TEST_FLOW = _create_qnn_flow(
48+
"qnn_16a8w", quantize=True, quant_dtype=QuantDtype.use_16a8w, use_fp16=False
49+
)
50+
QNN_16A4W_TEST_FLOW = _create_qnn_flow(
51+
"qnn_16a4w", quantize=True, quant_dtype=QuantDtype.use_16a4w, use_fp16=False
52+
)
53+
QNN_16A4W_BLOCK_TEST_FLOW = _create_qnn_flow(
54+
"qnn_16a4w_block",
55+
quantize=True,
56+
quant_dtype=QuantDtype.use_8a8w,
57+
use_fp16=False,
58+
)
59+
QNN_8A8W_TEST_FLOW = _create_qnn_flow(
60+
"qnn_8a8w", quantize=True, quant_dtype=QuantDtype.use_8a8w, use_fp16=False
61+
)

0 commit comments

Comments
 (0)