pytorch
diff --git a/‎backends/qualcomm/aot/wrappers/TensorWrapper.cpp‎
Lines changed: 3 additions & 1 deletion b/‎backends/qualcomm/aot/wrappers/TensorWrapper.cpp‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎backends/qualcomm/builders/op_avg_pool2d.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/qualcomm/builders/op_avg_pool2d.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/qualcomm/passes/convert_to_linear.py‎
Lines changed: 24 additions & 23 deletions b/‎backends/qualcomm/passes/convert_to_linear.py‎
Lines changed: 24 additions & 23 deletions
diff --git a/‎backends/qualcomm/passes/expand_broadcast_tensor_shape.py‎
Lines changed: 58 additions & 0 deletions b/‎backends/qualcomm/passes/expand_broadcast_tensor_shape.py‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎backends/qualcomm/quantizer/quantizer.py‎
Lines changed: 12 additions & 7 deletions b/‎backends/qualcomm/quantizer/quantizer.py‎
Lines changed: 12 additions & 7 deletions
diff --git a/‎backends/qualcomm/quantizer/utils.py‎
Lines changed: 104 additions & 2 deletions b/‎backends/qualcomm/quantizer/utils.py‎
Lines changed: 104 additions & 2 deletions
@@ -91,7 +91,9 @@ TensorWrapper::TensorWrapper(
   if (data != nullptr) {
     QNN_VER_PTR(tensor_)->clientBuf.dataSize = bytes;
 
-    if (copy_data) {
+    if (tensor_type != QNN_TENSOR_TYPE_STATIC) {
+      QNN_VER_PTR(tensor_)->clientBuf.data = nullptr;
+    } else if (copy_data) {
       owned_data_ = std::make_unique<char[]>(bytes);
       const char* src_data = static_cast<const char*>(data);
       std::memcpy(owned_data_.get(), src_data, bytes);
 
@@ -51,8 +51,8 @@ def define_node(
             filter_size = filter_size + filter_size
         filter_size_shape = [len(filter_size)]
 
-        # stride info
-        stride = cast(List[int], node.args[2])
+        # stride info - default to kernel_size if not given
+        stride = cast(List[int], node.args[2]) if len(node.args) > 2 else filter_size
         if len(stride) == 1:
             stride = stride + stride
         stride_shape = [len(stride)]
 
@@ -109,49 +109,50 @@ def _convert_to_linear(
 
         # Since QNN has no keep dims for linear op, we will need to add squeeze and unsqueeze around linear node
         # TODO: Find a more general conditional statement.
-        if (
-            fn_node.target == self.add
-            and linear_node.meta["val"].dim() == 3
-            and linear_node.meta["val"].shape[0] == 1
-        ):
-            squeeze_dim = linear_node.meta["val"].shape[1:]
-            linear_node.meta["val"] = torch.squeeze(linear_node.meta["val"], 0)
+        linear_output = linear_node.meta["val"]
+        if linear_output.dim() == 3 and linear_output.shape[0] == 1:
             with gm.graph.inserting_after(input_node):
                 input_users = list(input_node.users.keys())
-                squeeze_dim = linear_node.meta["val"].shape
-                squeeze_view_copy_node = gm.graph.create_node(
+                input_tensor = input_node.meta["val"]
+                squeeze_dim = input_tensor.shape[-2:]
+                squeeze_node = gm.graph.create_node(
                     "call_function",
                     self.view_copy,
                     (
                         input_node,
                         squeeze_dim,
                     ),
                 )
-                squeeze_view_copy_node.meta = linear_node.meta
+                # meta needs to be copied elementwisely for fake-tensor
+                # to be updated correctly and not affect meta of input_node
+                for k, v in input_node.meta.items():
+                    squeeze_node.meta[k] = v
+                squeeze_node.meta["val"] = input_tensor.reshape(squeeze_dim)
                 for user in input_users:
                     if user == linear_node:
-                        user.replace_input_with(input_node, squeeze_view_copy_node)
-            with gm.graph.inserting_after(output):
+                        user.replace_input_with(input_node, squeeze_node)
+
+            with gm.graph.inserting_after(linear_node):
                 output_users = list(linear_node.users.keys())
-                unsqueeze_dim = output.args[0].meta["val"].shape
-                unsqueeze_view_copy_node = gm.graph.create_node(
+                unsqueeze_dim = linear_output.shape
+                unsqueeze_node = gm.graph.create_node(
                     "call_function",
                     self.view_copy,
                     (
                         linear_node,
                         unsqueeze_dim,
                     ),
                 )
-                unsqueeze_view_copy_node.meta = output.args[0].meta
+                # meta needs to be copied elementwisely for fake-tensor
+                # to be updated correctly and not affect meta of unsqueeze_node
+                for k, v in linear_node.meta.items():
+                    unsqueeze_node.meta[k] = v
+                # update linear node's shape
+                linear_node.meta["val"] = linear_output.reshape(
+                    linear_output.shape[-2:]
+                )
                 for user in output_users:
-                    user.replace_input_with(linear_node, unsqueeze_view_copy_node)
-            if QCOM_QUANT_ATTRS in linear_node.meta:
-                squeeze_view_copy_node.meta[QCOM_QUANT_ATTRS] = linear_node.meta[
-                    QCOM_QUANT_ATTRS
-                ]
-                unsqueeze_view_copy_node.meta[QCOM_QUANT_ATTRS] = linear_node.meta[
-                    QCOM_QUANT_ATTRS
-                ]
+                    user.replace_input_with(linear_node, unsqueeze_node)
 
     def _extract_mm_ops(self, partitioned_nodes: List[edge_op]) -> List[torch.fx.Node]:
         mm_node = [n for n in partitioned_nodes if n.target == self.mm][0]
 
@@ -0,0 +1,58 @@
+# Copyright (c) Qualcomm Innovation Center, Inc.
+# All rights reserved
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import torch
+from executorch.exir.dialects._ops import ops as exir_ops
+from executorch.exir.pass_base import ExportPass, PassResult
+from executorch.exir.passes import dead_code_elimination_pass
+
+
+class ExpandBroadcastTensorShape(ExportPass):
+    """
+    Make tensors have same rank for layout-transform to work properly.
+    """
+
+    def __init__(self):
+        super(ExpandBroadcastTensorShape, self).__init__()
+        self.broadcast_op_targets = [
+            exir_ops.edge.aten.add.Tensor,
+            exir_ops.edge.aten.sub.Tensor,
+            exir_ops.edge.aten.mul.Tensor,
+            exir_ops.edge.aten.div.Tensor,
+        ]
+
+    def traverse_broadcast_node(self, graph_module: torch.fx.GraphModule):
+        for node in graph_module.graph.nodes:
+            if node.target in self.broadcast_op_targets:
+                for arg in node.args:
+                    input_rank = len(arg.meta["val"].shape)
+                    output_rank = len(node.meta["val"].shape)
+                    if input_rank != output_rank:
+                        with graph_module.graph.inserting_after(arg):
+                            new_rank = [1] * (output_rank - input_rank) + list(
+                                arg.meta["val"].shape
+                            )
+                            users = list(arg.users.keys())
+                            reshape_node = graph_module.graph.create_node(
+                                "call_function",
+                                exir_ops.edge.aten.view_copy.default,
+                                (arg, tuple(new_rank)),
+                            )
+                            # meta needs to be copied elementwisely for fake-tensor
+                            # to be updated correctly and not affect meta of arg
+                            for k, v in arg.meta.items():
+                                reshape_node.meta[k] = v
+                            reshape_node.meta["val"] = reshape_node.meta["val"].reshape(
+                                new_rank
+                            )
+                            for user in users:
+                                user.replace_input_with(arg, reshape_node)
+
+    def call(self, graph_module: torch.fx.GraphModule):
+        self.traverse_broadcast_node(graph_module)
+        graph_module.recompile()
+        dead_code_elimination_pass(graph_module)
+        return PassResult(graph_module, True)
@@ -26,7 +26,7 @@
     get_16a8w_qnn_ptq_config,
     get_default_16bit_qnn_ptq_config,
     get_default_8bit_qnn_ptq_config,
-    get_ptq_per_channel_weight_config,
+    get_ptq_per_channel_quant_config,
     OP_ANNOTATOR,
     QuantizationConfig,
 )
@@ -72,6 +72,7 @@ def __init__(self):
             "8bit_act": torch.int8,
             "16bit_act": torch.int16,
         }
+        self.per_channel_quant_config = None
 
     def _annotate(self, gm: GraphModule) -> None:
         for node in gm.graph.nodes:
@@ -96,13 +97,17 @@ def _get_quant_config(self, op: str | OpOverload) -> Optional[QuantizationConfig
             return
 
         if op in self.use_per_channel_weight_quant_ops:
-            if op in self.bit16_quant_ops:
-                return get_ptq_per_channel_weight_config(
-                    torch.uint16, self.per_channel_weight_dtype["16bit_act"]
+            if self.per_channel_quant_config is None:
+                if op in self.bit16_quant_ops:
+                    return get_ptq_per_channel_quant_config(
+                        act_dtype=torch.uint16,
+                        weight_dtype=self.per_channel_weight_dtype["16bit_act"],
+                    )
+                return get_ptq_per_channel_quant_config(
+                    act_dtype=torch.uint8,
+                    weight_dtype=self.per_channel_weight_dtype["8bit_act"],
                 )
-            return get_ptq_per_channel_weight_config(
-                weight_dtype=self.per_channel_weight_dtype["8bit_act"]
-            )
+            return self.per_channel_quant_config
 
         if op in self.bit8_quant_ops:
             return self.bit8_quant_config
 
@@ -20,6 +20,7 @@
     MinMaxObserver,
     MovingAverageMinMaxObserver,
     PerChannelMinMaxObserver,
+    UniformQuantizationObserverBase,
 )
 
 from torch.ao.quantization.quantizer import (
@@ -35,6 +36,107 @@
 from torch.fx import Node
 
 
+class ParamObserver(UniformQuantizationObserverBase):
+    def __init__(
+        self,
+        ch_axis=0,
+        use_mse=True,
+        steps=100,
+        dtype=torch.int8,
+        qscheme=torch.per_channel_symmetric,
+        reduce_range=False,
+        quant_min=None,
+        quant_max=None,
+        factory_kwargs=None,
+        eps=torch.finfo(torch.float32).eps,  # noqa: B008
+        is_dynamic=False,
+        **kwargs,
+    ) -> None:
+        super().__init__(
+            dtype=dtype,
+            qscheme=qscheme,
+            reduce_range=reduce_range,
+            quant_min=quant_min,
+            quant_max=quant_max,
+            factory_kwargs=factory_kwargs,
+            eps=eps,
+            is_dynamic=is_dynamic,
+            **kwargs,
+        )
+
+        factory_kwargs = torch.nn.factory_kwargs(factory_kwargs)
+        self.register_buffer("min_val", torch.tensor(float("inf"), **factory_kwargs))
+        self.register_buffer("max_val", torch.tensor(float("-inf"), **factory_kwargs))
+        self.ch_axis = ch_axis
+        self.use_mse = use_mse
+        self.steps = steps
+        self.calibrated = False
+
+    def to_ch_axis(self, x):
+        axis_order = list(range(len(x.size())))
+        axis_order[self.ch_axis], axis_order[0] = 0, self.ch_axis
+        return torch.flatten(x.permute(axis_order), start_dim=1)
+
+    def mse(self, pred, expect):
+        loss = (pred - expect).abs().pow(2)
+        return self.to_ch_axis(loss).mean(1)
+
+    def cosine(self, pred, expect):
+        target = torch.ones(pred.shape[self.ch_axis])
+        pred_n = self.to_ch_axis(pred).reshape(pred.shape[0], -1)
+        expect_n = self.to_ch_axis(expect).reshape(expect.shape[0], -1)
+        return torch.nn.CosineEmbeddingLoss()(pred_n, expect_n, target)
+
+    def loss_fn(self, x, new_min, new_max):
+        scale, offset = self._calculate_qparams(new_min, new_max)
+        x_q = torch.fake_quantize_per_channel_affine(
+            x,
+            scale.data,
+            offset.data.int(),
+            self.ch_axis,
+            self.quant_min,
+            self.quant_max,
+        )
+        return self.mse(x_q, x) if self.use_mse else self.cosine(x_q, x)
+
+    def line_search(self, x):
+        x_min, x_max = torch.aminmax(self.to_ch_axis(x), dim=1)
+        x_range = torch.max(x_min.abs(), x_max)
+        optimal_loss = torch.zeros_like(x_min) + 1e9
+
+        # check which clip range could produce smallest loss
+        for i in range(1, self.steps + 1):
+            thres = x_range / self.steps * i
+            current_loss = self.loss_fn(x, -thres, thres)
+            x_min = torch.where(current_loss < optimal_loss, -thres, x_min)
+            x_max = torch.where(current_loss < optimal_loss, thres, x_max)
+            optimal_loss = torch.min(current_loss, optimal_loss)
+
+        return x_min, x_max
+
+    def forward(self, x_orig):
+        # since params are static, one calibration is enough
+        if not self.calibrated:
+            x = x_orig.detach().to(self.min_val.dtype)
+            self.min_val, self.max_val = self.line_search(x)
+            self.calibrated = True
+
+        # return fake-quant result for saturating outliers
+        scale, zero_point = self._calculate_qparams(self.min_val, self.max_val)
+        return torch.fake_quantize_per_channel_affine(
+            x_orig,
+            scale.data,
+            zero_point.data.int(),
+            self.ch_axis,
+            self.quant_min,
+            self.quant_max,
+        )
+
+    @torch.jit.export
+    def calculate_qparams(self):
+        return self._calculate_qparams(self.min_val, self.max_val)
+
+
 @dataclass(eq=True, frozen=True)
 class QuantizationConfig:
     input_activation: Optional[QuantizationSpec]
@@ -235,7 +337,7 @@ def get_default_16bit_qnn_ptq_config(
     return quantization_config
 
 
-def get_ptq_per_channel_weight_config(
+def get_ptq_per_channel_quant_config(
     act_dtype=torch.uint8, weight_dtype=torch.int8
 ) -> QuantizationConfig:
     extra_args: Dict[str, Any] = {"eps": 2**-12}
@@ -585,7 +687,7 @@ def annotate_prelu(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_single_in_single_out(node, quantization_config)
 
 
-@register_annotator([torch.ops.aten.view.default])
+@register_annotator([torch.ops.aten.view.default, torch.ops.aten._unsafe_view.default])
 def annotate_view(node: Node, quantization_config: QuantizationConfig) -> None:
     annotate_in_out_obs_sharing_op(node, quantization_config)
     if not _is_annotated([node]):