Skip to content

Commit f50db77

Browse files
DannyYuyang-quickirklandsign
authored andcommitted
Qualcomm AI Engine Direct - Support tile op for different I/O rank (#10054)
Summary: - Support if the rank of input tensor is less than the rank of output tensor. - make_quantizer kwargs alignment. - Remove module.eval() since calling eval() is not supported for exported models. ### Test plan ``` bash python -m backends.qualcomm.tests.test_qnn_delegate TestQNNQuantizedOperator.test_qnn_backend_expand -s ${device_id} -H ${host_id} -m ${soc} -b build-android ```
1 parent ff17dc2 commit f50db77

File tree

5 files changed

+44
-16
lines changed

5 files changed

+44
-16
lines changed

backends/qualcomm/_passes/expand_broadcast_tensor_shape.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,16 @@ def __init__(self):
2222
exir_ops.edge.aten.sub.Tensor,
2323
exir_ops.edge.aten.mul.Tensor,
2424
exir_ops.edge.aten.div.Tensor,
25+
# Support if the rank of input tensor: {input_dims} is less than the rank of output tensor: {output_dims}.
26+
exir_ops.edge.aten.expand_copy.default,
2527
]
2628

2729
def traverse_broadcast_node(self, graph_module: torch.fx.GraphModule):
2830
for node in graph_module.graph.nodes:
2931
if node.target in self.broadcast_op_targets:
3032
for arg in node.args:
33+
if not isinstance(arg, torch.fx.Node):
34+
continue
3135
input_rank = len(arg.meta["val"].shape)
3236
output_rank = len(node.meta["val"].shape)
3337
if input_rank != output_rank:

backends/qualcomm/_passes/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def get_passes_dependency_for_capture_program():
107107
ConvertUpsampleBicubicWithBilinear: [RemoveRedundancy],
108108
DecomposeAny: [RemoveRedundancy],
109109
DecomposeLinalgVectorNorm: [RemoveRedundancy],
110-
ExpandBroadcastTensorShape: [RemoveRedundancy],
110+
ExpandBroadcastTensorShape: [FoldQDQ],
111111
FixedLinearKeepDim: [FoldQDQ],
112112
FoldQDQ: [AnnotateQuantAttrs, AnnotateStack, AnnotateUnbind],
113113
I64toI32: [ConvertUpsampleBicubicWithBilinear, RemoveRedundancy],

backends/qualcomm/tests/test_qnn_delegate.py

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,11 @@
6969
from collections import defaultdict
7070
from typing import List
7171

72-
from executorch.backends.qualcomm._passes import FoldQDQ, TagQuantIO
72+
from executorch.backends.qualcomm._passes import (
73+
ExpandBroadcastTensorShape,
74+
FoldQDQ,
75+
TagQuantIO,
76+
)
7377
from executorch.backends.qualcomm.builders.node_visitor import get_node_visitors
7478
from executorch.backends.qualcomm.debugger.utils import DrawGraph
7579
from executorch.examples.models.deeplab_v3 import DeepLabV3ResNet101Model
@@ -435,10 +439,20 @@ def test_qnn_backend_equal(self):
435439

436440
def test_qnn_backend_expand(self):
437441
modules = [ExpandAs(), ExpandCopy()] # noqa: F405
438-
sample_input = (torch.randn([3, 1]),)
439-
for i, module in enumerate(modules):
440-
with self.subTest(i=i):
441-
self.lower_module_and_test_output(module, sample_input)
442+
sample_inputs = [
443+
(torch.randn([3, 1]),),
444+
(torch.randn([4]),),
445+
]
446+
passes_job = get_capture_program_passes()
447+
passes_job[ExpandBroadcastTensorShape][QCOM_PASS_ACTIVATE_KEY] = True
448+
index = 0
449+
for module in modules:
450+
for sample_input in sample_inputs:
451+
with self.subTest(i=index):
452+
self.lower_module_and_test_output(
453+
module, sample_input, passes_job=passes_job
454+
)
455+
index += 1
442456

443457
def test_qnn_backend_expm1(self):
444458
sample_input = (torch.randn(3, 4, 5),)
@@ -1517,11 +1531,21 @@ def test_qnn_backend_equal(self):
15171531

15181532
def test_qnn_backend_expand(self):
15191533
modules = [ExpandAs(), ExpandCopy()] # noqa: F405
1520-
sample_input = (torch.randn([3, 1]),)
1521-
for i, module in enumerate(modules):
1522-
with self.subTest(i=i):
1523-
module = self.get_qdq_module(module, sample_input)
1524-
self.lower_module_and_test_output(module, sample_input)
1534+
sample_inputs = [
1535+
(torch.randn([3, 1]),),
1536+
(torch.randn([4]),),
1537+
]
1538+
passes_job = get_capture_program_passes()
1539+
passes_job[ExpandBroadcastTensorShape][QCOM_PASS_ACTIVATE_KEY] = True
1540+
index = 0
1541+
for module in modules:
1542+
for sample_input in sample_inputs:
1543+
with self.subTest(i=index):
1544+
module = self.get_qdq_module(module, sample_input)
1545+
self.lower_module_and_test_output(
1546+
module, sample_input, passes_job=passes_job
1547+
)
1548+
index += 1
15251549

15261550
def test_qnn_backend_expm1(self):
15271551
sample_input = (torch.randn(3, 4, 5),)

backends/qualcomm/tests/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
import subprocess
1010
import tempfile
1111
import unittest
12-
from typing import Callable, Dict, List, Optional, Tuple
12+
from typing import Callable, Dict, List, Optional, OrderedDict, Tuple
1313

1414
import numpy as np
1515
import torch
@@ -435,6 +435,7 @@ def lower_module_and_test_output(
435435
expected_profile_events: int = -1,
436436
expected_intermediate_events: int = -1,
437437
assert_output_equal: bool = True,
438+
passes_job: Optional[OrderedDict] = None,
438439
skip_node_id_set: set = None,
439440
skip_node_op_set: set = None,
440441
dynamic_shapes: Dict = None,
@@ -508,7 +509,6 @@ def get_qdq_module(
508509
block_size_map: Dict[str, Tuple] = None,
509510
submodule_qconfig_list: Optional[List[Tuple[Callable, ModuleQConfig]]] = None,
510511
) -> torch.fx.GraphModule:
511-
module = module.eval()
512512
m = torch.export.export(
513513
module, inputs, dynamic_shapes=dynamic_shapes, strict=True
514514
).module()

examples/qualcomm/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -262,7 +262,7 @@ def make_quantizer(
262262
per_channel_linear=False,
263263
act_observer=MovingAverageMinMaxObserver,
264264
is_qat=False,
265-
callback_qconfig_list: Optional[List[Tuple[Callable, ModuleQConfig]]] = None,
265+
submodule_qconfig_list: Optional[List[Tuple[Callable, ModuleQConfig]]] = None,
266266
):
267267
quantizer = QnnQuantizer()
268268
quantizer.add_custom_quant_annotations(custom_annotations)
@@ -273,8 +273,8 @@ def make_quantizer(
273273
is_linear_per_channel=per_channel_linear,
274274
act_observer=act_observer,
275275
)
276-
callback_qconfig_list = callback_qconfig_list or []
277-
quantizer.set_submodule_qconfig_list(callback_qconfig_list)
276+
submodule_qconfig_list = submodule_qconfig_list or []
277+
quantizer.set_submodule_qconfig_list(submodule_qconfig_list)
278278
return quantizer
279279

280280

0 commit comments

Comments
 (0)