Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions backends/qualcomm/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Please check `generate_qnn_executorch_compiler_spec()` in
- Snapdragon 8 Gen 1+
- Snapdragon 8 Gen 2
- Snapdragon 8 Gen 3
- Snapdragon 8 Elite

### Adding more supported Chipset
Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.
Expand Down Expand Up @@ -120,11 +121,9 @@ PRs are always welcome to help improve the codebase in a comprehensive manner. B

- **Code Reviews**:<br/>
Please ping authors in Qualcomm AI Engine Direct related PRs for reviewing, possible candidates are listed below:
- [chiwwang](https://github.com/chiwwang)
- [shewu-quic](https://github.com/shewu-quic)
- [chunit-quic](https://github.com/chunit-quic)
- [winskuo-quic](https://github.com/winskuo-quic)
- [chuntl](https://github.com/chuntl)
- [haowhsu-quic](https://github.com/haowhsu-quic)

Thanks again for your contribution!
6 changes: 3 additions & 3 deletions backends/qualcomm/_passes/convert_to_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,11 @@ def _convert_to_linear(
# Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node
# TODO: Find a more general conditional statement.
linear_output = linear_node.meta["val"]
if linear_output.dim() == 3 and linear_output.shape[0] == 1:
if linear_output.dim() >= 3:
with gm.graph.inserting_after(input_node):
input_users = list(input_node.users.keys())
input_tensor = input_node.meta["val"]
squeeze_dim = input_tensor.shape[-2:]
squeeze_dim = (-1, input_tensor.shape[-1])
squeeze_node = gm.graph.create_node(
"call_function",
self.view_copy,
Expand Down Expand Up @@ -149,7 +149,7 @@ def _convert_to_linear(
unsqueeze_node.meta[k] = v
# update linear node's shape
linear_node.meta["val"] = linear_output.reshape(
linear_output.shape[-2:]
(squeeze_node.meta["val"].shape[0], linear_output.shape[-1])
)
for user in output_users:
user.replace_input_with(linear_node, unsqueeze_node)
Expand Down
17 changes: 16 additions & 1 deletion backends/qualcomm/_passes/layout_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,34 +42,49 @@ class LayoutTransform(ExportPass):
}

layout_agnostic_ops = {
exir_ops.edge.aten.abs.default,
exir_ops.edge.aten.add.Tensor,
exir_ops.edge.aten.bmm.default,
exir_ops.edge.aten.cat.default,
exir_ops.edge.aten.ceil.default,
exir_ops.edge.aten.clamp.default,
exir_ops.edge.aten.constant_pad_nd.default,
exir_ops.edge.aten.div.Tensor,
exir_ops.edge.aten.eq.Scalar,
exir_ops.edge.aten.eq.Tensor,
exir_ops.edge.aten.full.default,
exir_ops.edge.aten.ge.Scalar,
exir_ops.edge.aten.ge.Tensor,
exir_ops.edge.aten.gelu.default,
exir_ops.edge.aten.gt.Scalar,
exir_ops.edge.aten.gt.Tensor,
exir_ops.edge.aten.hardswish.default,
exir_ops.edge.aten.hardsigmoid.default,
exir_ops.edge.aten.hardtanh.default,
exir_ops.edge.aten.leaky_relu.default,
exir_ops.edge.aten.le.Scalar,
exir_ops.edge.aten.le.Tensor,
exir_ops.edge.aten.linear.default,
exir_ops.edge.aten.log.default,
exir_ops.edge.aten.lt.Scalar,
exir_ops.edge.aten.lt.Tensor,
exir_ops.edge.aten._log_softmax.default,
exir_ops.edge.aten.maximum.default,
exir_ops.edge.aten.mean.dim,
exir_ops.edge.aten.minimum.default,
exir_ops.edge.aten.mul.Tensor,
exir_ops.edge.aten.pow.Tensor_Scalar,
exir_ops.edge.aten.prelu.default,
exir_ops.edge.aten.repeat.default,
exir_ops.edge.aten.relu.default,
exir_ops.edge.aten._softmax.default, # TODO: Need to find a new solution to do "axis_order" to transform axis.
exir_ops.edge.aten.sigmoid.default,
exir_ops.edge.aten.split_with_sizes.default,
exir_ops.edge.aten.sqrt.default,
exir_ops.edge.aten.sub.Tensor,
exir_ops.edge.aten.sum.dim_IntList,
exir_ops.edge.aten.topk.default,
exir_ops.edge.aten._to_copy.default,
exir_ops.edge.aten.split_with_sizes.default,
*q_ops,
*dq_ops,
_operator.getitem,
Expand Down
10 changes: 2 additions & 8 deletions backends/qualcomm/_passes/recompose_pixel_unshuffle.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ def __init__(self, quantization_capture=False):
self.view_target = exir_ops.edge.aten.view_copy.default
self.op = exir_ops.edge.aten.pixel_unshuffle.default

self.quantization_capture = quantization_capture
if quantization_capture:
self.reshape_target = torch.ops.aten._unsafe_view.default
self.reshape_target = torch.ops.aten.reshape.default
self.permute_target = torch.ops.aten.permute.default
self.view_target = torch.ops.aten.view.default
self.op = torch.ops.aten.pixel_unshuffle.default
Expand All @@ -35,12 +34,7 @@ def call(self, graph_module: torch.fx.GraphModule):
if node.op == "call_function" and node.target == self.reshape_target:
with graph.inserting_after(node):

# Clone op still exists between permute and reshape_target during quantization,
# so we need to check for args[0].args[0] to get permute node
if self.quantization_capture:
premute_node = node.args[0].args[0]
else:
premute_node = node.args[0]
premute_node = node.args[0]
if any(
[
len(node.args[1]) != 4,
Expand Down
46 changes: 26 additions & 20 deletions backends/qualcomm/_passes/remove_redundancy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,31 +14,37 @@ class RemoveRedundancy(ExportPass):
Trim certain operators to reduce unnecessary overhead.
"""

redundant_ops = {
torch.clone,
torch.ops.aten.clone.default,
exir_ops.edge.aten.clone.default,
torch.ops.aten.alias.default,
exir_ops.edge.aten.alias.default,
exir_ops.edge.aten.lift_fresh_copy.default,
# remove this target if '_skip_dim_order' is set to False
exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
# remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
exir_ops.edge.aten._to_copy.default,
}

def __init__(self):
super(RemoveRedundancy, self).__init__()
self.redundant_ops = {
torch.clone: self._default_condition,
torch.ops.aten.clone.default: self._default_condition,
exir_ops.edge.aten.clone.default: self._default_condition,
torch.ops.aten.alias.default: self._default_condition,
exir_ops.edge.aten.alias.default: self._default_condition,
exir_ops.edge.aten.lift_fresh_copy.default: self._default_condition,
# remove this target if '_skip_dim_order' is set to False
exir_ops.edge.dim_order_ops._to_dim_order_copy.default: self._dim_order_op_condition,
# remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
exir_ops.edge.aten._to_copy.default: self._to_copy_op_condition,
}

def _dim_order_op_condition(self, node):
dim_order = node.kwargs.get("dim_order")
# skip if there contains layout hint
# e.g. (0, 2, 3, 1) != (0, 1, 2, 3)
return dim_order != list(range(len(dim_order)))

def _to_copy_op_condition(self, node):
return "memory_format" in node.kwargs

def _default_condition(self, ndoe):
return True

def _remove(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
for n in graph_module.graph.nodes:
if n.target not in self.redundant_ops:
continue

# do not remove cast operator
if (
n.target == exir_ops.edge.aten._to_copy.default
and "memory_format" not in n.kwargs
if n.target not in self.redundant_ops or not self.redundant_ops[n.target](
n
):
continue

Expand Down
24 changes: 24 additions & 0 deletions backends/qualcomm/builders/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

from . import (
node_visitor,
op_abs,
op_add,
op_arange,
op_avg_pool2d,
op_batch_norm,
op_bmm,
Expand All @@ -19,26 +21,36 @@
op_dequantize,
op_div,
op_embedding,
op_eq,
op_expand,
op_full_like,
op_ge,
op_gelu,
op_group_norm,
op_gt,
op_hardsigmoid,
op_hardswish,
op_hardtanh,
op_index,
op_index_put,
op_layer_norm,
op_le,
op_linear,
op_log,
op_log_softmax,
op_lt,
op_matmul,
op_max,
op_max_pool2d,
op_mean_dim,
op_min,
op_mul,
op_pad,
op_pow,
op_prelu,
op_quantize,
op_relu,
op_repeat,
op_reshape,
op_rms_norm,
op_rsqrt,
Expand All @@ -65,7 +77,9 @@

__all__ = [
node_visitor,
op_abs,
op_add,
op_arange,
op_avg_pool2d,
op_batch_norm,
op_bmm,
Expand All @@ -78,26 +92,36 @@
op_dequantize,
op_div,
op_embedding,
op_eq,
op_expand,
op_full_like,
op_ge,
op_gelu,
op_group_norm,
op_gt,
op_hardswish,
op_hardtanh,
op_hardsigmoid,
op_index,
op_index_put,
op_layer_norm,
op_le,
op_linear,
op_log,
op_log_softmax,
op_lt,
op_matmul,
op_max,
op_max_pool2d,
op_mean_dim,
op_min,
op_mul,
op_pad,
op_pow,
op_prelu,
op_quantize,
op_relu,
op_repeat,
op_reshape,
op_rms_norm,
op_rsqrt,
Expand Down
56 changes: 56 additions & 0 deletions backends/qualcomm/builders/op_abs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper

import torch

from .node_visitor import NodeVisitor, register_node_visitor
from .qnn_constants import OpElementWiseAbs, QNN_OP_PACKAGE_NAME_QTI_AISW


@register_node_visitor
class Abs(NodeVisitor):
target = ["aten.abs.default"]

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
) -> PyQnnWrapper.PyQnnOpWrapper:
out_tensor = self.get_tensor(node, node)
output_tensor_wrapper = self.define_tensor(
node,
node,
out_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
abs_output_tensors = [output_tensor_wrapper]

input_node = node.args[0]
input_tensor_wrapper = self.define_tensor(
input_node,
node,
self.get_tensor(input_node, node),
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
nodes_to_wrappers,
)
abs_input_tensors = [input_tensor_wrapper]

abs_op = PyQnnWrapper.PyQnnOpWrapper(
node.name,
QNN_OP_PACKAGE_NAME_QTI_AISW,
OpElementWiseAbs.op_name,
)
abs_op.AddInputTensors(abs_input_tensors)
abs_op.AddOutputTensors(abs_output_tensors)

return abs_op
37 changes: 37 additions & 0 deletions backends/qualcomm/builders/op_arange.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Copyright (c) Qualcomm Innovation Center, Inc.
# All rights reserved
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from typing import Dict

import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper

import torch

from .node_visitor import NodeVisitor, register_node_visitor


@register_node_visitor
class Arange(NodeVisitor):
target = ["aten.arange.start_step"]

def __init__(self, *args) -> None:
super().__init__(*args)

def define_node(
self,
node: torch.fx.Node,
nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
) -> PyQnnWrapper.PyQnnOpWrapper:
start, end = node.args[0:2]
step = node.args[2] if len(node.args) > 2 else 1
out_tensor = torch.arange(start, end, step)

self.define_tensor(
node,
node,
out_tensor,
PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
nodes_to_wrappers,
)
2 changes: 1 addition & 1 deletion backends/qualcomm/builders/op_conv2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def _define_conv1d(
padding_shape,
dilation,
dilation_shape,
groups,
groups=groups,
)
op_wrapper_list.append(conv_op)

Expand Down
Loading
Loading