pytorch · facebook-github-bot · Jan 22, 2025 · Dec 18, 2024
@@ -20,6 +20,7 @@ Please check `generate_qnn_executorch_compiler_spec()` in
 - Snapdragon 8 Gen 1+
 - Snapdragon 8 Gen 2
 - Snapdragon 8 Gen 3
+- Snapdragon 8 Elite
 
 ### Adding more supported Chipset
 Currently, users cannot add additional chipset models because the chipset ID is not accessible to community users. If you have specific chipset models you wish to add, please contact one of the authors in the `Code Reviews` section at the bottom of this page.
@@ -120,11 +121,9 @@ PRs are always welcome to help improve the codebase in a comprehensive manner. B
 
 - **Code Reviews**:<br/>
     Please ping authors in Qualcomm AI Engine Direct related PRs for reviewing, possible candidates are listed below:
-    - [chiwwang](https://github.com/chiwwang)
     - [shewu-quic](https://github.com/shewu-quic)
     - [chunit-quic](https://github.com/chunit-quic)
     - [winskuo-quic](https://github.com/winskuo-quic)
-    - [chuntl](https://github.com/chuntl)
     - [haowhsu-quic](https://github.com/haowhsu-quic)
 
 Thanks again for your contribution!
@@ -110,11 +110,11 @@ def _convert_to_linear(
         # Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node
         # TODO: Find a more general conditional statement.
         linear_output = linear_node.meta["val"]
-        if linear_output.dim() == 3 and linear_output.shape[0] == 1:
+        if linear_output.dim() >= 3:
             with gm.graph.inserting_after(input_node):
                 input_users = list(input_node.users.keys())
                 input_tensor = input_node.meta["val"]
-                squeeze_dim = input_tensor.shape[-2:]
+                squeeze_dim = (-1, input_tensor.shape[-1])
                 squeeze_node = gm.graph.create_node(
                     "call_function",
                     self.view_copy,
@@ -149,7 +149,7 @@ def _convert_to_linear(
                     unsqueeze_node.meta[k] = v
                 # update linear node's shape
                 linear_node.meta["val"] = linear_output.reshape(
-                    linear_output.shape[-2:]
+                    (squeeze_node.meta["val"].shape[0], linear_output.shape[-1])
                 )
                 for user in output_users:
                     user.replace_input_with(linear_node, unsqueeze_node)

@@ -42,34 +42,49 @@ class LayoutTransform(ExportPass):
     }
 
     layout_agnostic_ops = {
+        exir_ops.edge.aten.abs.default,
         exir_ops.edge.aten.add.Tensor,
         exir_ops.edge.aten.bmm.default,
         exir_ops.edge.aten.cat.default,
         exir_ops.edge.aten.ceil.default,
         exir_ops.edge.aten.clamp.default,
         exir_ops.edge.aten.constant_pad_nd.default,
         exir_ops.edge.aten.div.Tensor,
+        exir_ops.edge.aten.eq.Scalar,
+        exir_ops.edge.aten.eq.Tensor,
         exir_ops.edge.aten.full.default,
+        exir_ops.edge.aten.ge.Scalar,
+        exir_ops.edge.aten.ge.Tensor,
         exir_ops.edge.aten.gelu.default,
+        exir_ops.edge.aten.gt.Scalar,
+        exir_ops.edge.aten.gt.Tensor,
         exir_ops.edge.aten.hardswish.default,
         exir_ops.edge.aten.hardsigmoid.default,
         exir_ops.edge.aten.hardtanh.default,
         exir_ops.edge.aten.leaky_relu.default,
+        exir_ops.edge.aten.le.Scalar,
+        exir_ops.edge.aten.le.Tensor,
         exir_ops.edge.aten.linear.default,
+        exir_ops.edge.aten.log.default,
+        exir_ops.edge.aten.lt.Scalar,
+        exir_ops.edge.aten.lt.Tensor,
         exir_ops.edge.aten._log_softmax.default,
+        exir_ops.edge.aten.maximum.default,
         exir_ops.edge.aten.mean.dim,
+        exir_ops.edge.aten.minimum.default,
         exir_ops.edge.aten.mul.Tensor,
         exir_ops.edge.aten.pow.Tensor_Scalar,
         exir_ops.edge.aten.prelu.default,
+        exir_ops.edge.aten.repeat.default,
         exir_ops.edge.aten.relu.default,
         exir_ops.edge.aten._softmax.default,  # TODO: Need to find a new solution to do "axis_order" to transform axis.
         exir_ops.edge.aten.sigmoid.default,
+        exir_ops.edge.aten.split_with_sizes.default,
         exir_ops.edge.aten.sqrt.default,
         exir_ops.edge.aten.sub.Tensor,
         exir_ops.edge.aten.sum.dim_IntList,
         exir_ops.edge.aten.topk.default,
         exir_ops.edge.aten._to_copy.default,
-        exir_ops.edge.aten.split_with_sizes.default,
         *q_ops,
         *dq_ops,
         _operator.getitem,

@@ -21,9 +21,8 @@ def __init__(self, quantization_capture=False):
         self.view_target = exir_ops.edge.aten.view_copy.default
         self.op = exir_ops.edge.aten.pixel_unshuffle.default
 
-        self.quantization_capture = quantization_capture
         if quantization_capture:
-            self.reshape_target = torch.ops.aten._unsafe_view.default
+            self.reshape_target = torch.ops.aten.reshape.default
             self.permute_target = torch.ops.aten.permute.default
             self.view_target = torch.ops.aten.view.default
             self.op = torch.ops.aten.pixel_unshuffle.default
@@ -35,12 +34,7 @@ def call(self, graph_module: torch.fx.GraphModule):
             if node.op == "call_function" and node.target == self.reshape_target:
                 with graph.inserting_after(node):
 
-                    # Clone op still exists between permute and reshape_target during quantization,
-                    # so we need to check for args[0].args[0] to get permute node
-                    if self.quantization_capture:
-                        premute_node = node.args[0].args[0]
-                    else:
-                        premute_node = node.args[0]
+                    premute_node = node.args[0]
                     if any(
                         [
                             len(node.args[1]) != 4,

@@ -14,31 +14,37 @@ class RemoveRedundancy(ExportPass):
     Trim certain operators to reduce unnecessary overhead.
     """
 
-    redundant_ops = {
-        torch.clone,
-        torch.ops.aten.clone.default,
-        exir_ops.edge.aten.clone.default,
-        torch.ops.aten.alias.default,
-        exir_ops.edge.aten.alias.default,
-        exir_ops.edge.aten.lift_fresh_copy.default,
-        # remove this target if '_skip_dim_order' is set to False
-        exir_ops.edge.dim_order_ops._to_dim_order_copy.default,
-        # remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
-        exir_ops.edge.aten._to_copy.default,
-    }
-
     def __init__(self):
         super(RemoveRedundancy, self).__init__()
+        self.redundant_ops = {
+            torch.clone: self._default_condition,
+            torch.ops.aten.clone.default: self._default_condition,
+            exir_ops.edge.aten.clone.default: self._default_condition,
+            torch.ops.aten.alias.default: self._default_condition,
+            exir_ops.edge.aten.alias.default: self._default_condition,
+            exir_ops.edge.aten.lift_fresh_copy.default: self._default_condition,
+            # remove this target if '_skip_dim_order' is set to False
+            exir_ops.edge.dim_order_ops._to_dim_order_copy.default: self._dim_order_op_condition,
+            # remove channel_last / contiguous _to_copy if '_skip_dim_order' is set to True
+            exir_ops.edge.aten._to_copy.default: self._to_copy_op_condition,
+        }
+
+    def _dim_order_op_condition(self, node):
+        dim_order = node.kwargs.get("dim_order")
+        # skip if there contains layout hint
+        # e.g. (0, 2, 3, 1) != (0, 1, 2, 3)
+        return dim_order != list(range(len(dim_order)))
+
+    def _to_copy_op_condition(self, node):
+        return "memory_format" in node.kwargs
+
+    def _default_condition(self, ndoe):
+        return True
 
     def _remove(self, graph_module: torch.fx.GraphModule) -> torch.fx.GraphModule:
         for n in graph_module.graph.nodes:
-            if n.target not in self.redundant_ops:
-                continue
-
-            # do not remove cast operator
-            if (
-                n.target == exir_ops.edge.aten._to_copy.default
-                and "memory_format" not in n.kwargs
+            if n.target not in self.redundant_ops or not self.redundant_ops[n.target](
+                n
             ):
                 continue
 

@@ -6,7 +6,9 @@
 
 from . import (
     node_visitor,
+    op_abs,
     op_add,
+    op_arange,
     op_avg_pool2d,
     op_batch_norm,
     op_bmm,
@@ -19,26 +21,36 @@
     op_dequantize,
     op_div,
     op_embedding,
+    op_eq,
     op_expand,
+    op_full_like,
+    op_ge,
     op_gelu,
     op_group_norm,
+    op_gt,
     op_hardsigmoid,
     op_hardswish,
     op_hardtanh,
     op_index,
     op_index_put,
     op_layer_norm,
+    op_le,
     op_linear,
+    op_log,
     op_log_softmax,
+    op_lt,
     op_matmul,
+    op_max,
     op_max_pool2d,
     op_mean_dim,
+    op_min,
     op_mul,
     op_pad,
     op_pow,
     op_prelu,
     op_quantize,
     op_relu,
+    op_repeat,
     op_reshape,
     op_rms_norm,
     op_rsqrt,
@@ -65,7 +77,9 @@
 
 __all__ = [
     node_visitor,
+    op_abs,
     op_add,
+    op_arange,
     op_avg_pool2d,
     op_batch_norm,
     op_bmm,
@@ -78,26 +92,36 @@
     op_dequantize,
     op_div,
     op_embedding,
+    op_eq,
     op_expand,
+    op_full_like,
+    op_ge,
     op_gelu,
     op_group_norm,
+    op_gt,
     op_hardswish,
     op_hardtanh,
     op_hardsigmoid,
     op_index,
     op_index_put,
     op_layer_norm,
+    op_le,
     op_linear,
+    op_log,
     op_log_softmax,
+    op_lt,
     op_matmul,
+    op_max,
     op_max_pool2d,
     op_mean_dim,
+    op_min,
     op_mul,
     op_pad,
     op_pow,
     op_prelu,
     op_quantize,
     op_relu,
+    op_repeat,
     op_reshape,
     op_rms_norm,
     op_rsqrt,

@@ -0,0 +1,56 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+from .qnn_constants import OpElementWiseAbs, QNN_OP_PACKAGE_NAME_QTI_AISW
+
+
+@register_node_visitor
+class Abs(NodeVisitor):
+    target = ["aten.abs.default"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        out_tensor = self.get_tensor(node, node)
+        output_tensor_wrapper = self.define_tensor(
+            node,
+            node,
+            out_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+        abs_output_tensors = [output_tensor_wrapper]
+
+        input_node = node.args[0]
+        input_tensor_wrapper = self.define_tensor(
+            input_node,
+            node,
+            self.get_tensor(input_node, node),
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_NATIVE,
+            nodes_to_wrappers,
+        )
+        abs_input_tensors = [input_tensor_wrapper]
+
+        abs_op = PyQnnWrapper.PyQnnOpWrapper(
+            node.name,
+            QNN_OP_PACKAGE_NAME_QTI_AISW,
+            OpElementWiseAbs.op_name,
+        )
+        abs_op.AddInputTensors(abs_input_tensors)
+        abs_op.AddOutputTensors(abs_output_tensors)
+
+        return abs_op
@@ -0,0 +1,37 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+from typing import Dict
+
+import executorch.backends.qualcomm.python.PyQnnWrapperAdaptor as PyQnnWrapper
+
+import torch
+
+from .node_visitor import NodeVisitor, register_node_visitor
+
+
+@register_node_visitor
+class Arange(NodeVisitor):
+    target = ["aten.arange.start_step"]
+
+    def __init__(self, *args) -> None:
+        super().__init__(*args)
+
+    def define_node(
+        self,
+        node: torch.fx.Node,
+        nodes_to_wrappers: Dict[torch.fx.Node, PyQnnWrapper.TensorWrapper],
+    ) -> PyQnnWrapper.PyQnnOpWrapper:
+        start, end = node.args[0:2]
+        step = node.args[2] if len(node.args) > 2 else 1
+        out_tensor = torch.arange(start, end, step)
+
+        self.define_tensor(
+            node,
+            node,
+            out_tensor,
+            PyQnnWrapper.Qnn_TensorType_t.QNN_TENSOR_TYPE_STATIC,
+            nodes_to_wrappers,
+        )
@@ -238,7 +238,7 @@ def _define_conv1d(
             padding_shape,
             dilation,
             dilation_shape,
-            groups,
+            groups=groups,
         )
         op_wrapper_list.append(conv_op)