pytorch
diff --git a/‎.ci/docker/requirements-ci.txt‎
Lines changed: 4 additions & 7 deletions b/‎.ci/docker/requirements-ci.txt‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/android-perf.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/apple/coreml/README.md‎
Lines changed: 3 additions & 3 deletions b/‎backends/apple/coreml/README.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 5 deletions b/‎backends/apple/coreml/scripts/install_requirements.sh‎
Lines changed: 1 addition & 5 deletions
diff --git a/‎backends/arm/_passes/size_adjust_conv2d_pass.py‎
Lines changed: 51 additions & 52 deletions b/‎backends/arm/_passes/size_adjust_conv2d_pass.py‎
Lines changed: 51 additions & 52 deletions
diff --git a/‎backends/arm/process_node.py‎
Lines changed: 8 additions & 58 deletions b/‎backends/arm/process_node.py‎
Lines changed: 8 additions & 58 deletions
diff --git a/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 2 additions & 2 deletions b/‎backends/arm/quantizer/quantization_annotator.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎backends/arm/quantizer/quantization_config.py‎
Lines changed: 38 additions & 2 deletions b/‎backends/arm/quantizer/quantization_config.py‎
Lines changed: 38 additions & 2 deletions
@@ -1,17 +1,14 @@
 mpmath==1.3.0
-numpy==1.21.3; python_version == '3.10'
-numpy==1.23.2; python_version == '3.11'
-numpy; python_version >= '3.12'
+numpy==2.0.0; python_version >= '3.10'
 PyYAML==6.0.1
 ruamel.yaml==0.17.32
 sympy==1.12
 timm==0.6.13
 tomli==2.0.1
 torchsr==1.0.4
-transformers==4.38.0
+transformers==4.47.1
 zstd==1.5.5.1
-pandas==2.0.3; python_version == '3.10'
-pandas; python_version >= '3.11'
+pandas==2.2.2; python_version >= '3.10'
 pytest==7.2.0
 pytest-cov==4.1.0
 expecttest==0.1.6
@@ -24,7 +21,7 @@ sphinx-gallery==0.14.0
 breathe==4.34.0
 exhale==0.2.3
 docutils==0.16
-matplotlib==3.7.2
+matplotlib==3.9.4
 # PyTorch Theme
 -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 myst-parser==0.18.1
 
@@ -260,7 +260,7 @@ jobs:
                       --output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
-                    export QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728
+                    export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
                     export LD_LIBRARY_PATH=$QNN_SDK_ROOT/lib/x86_64-linux-clang/
                     export PYTHONPATH=$(pwd)/..
 
@@ -347,7 +347,7 @@ jobs:
         PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
 
         export ANDROID_ABIS="arm64-v8a"
-        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.25.0.240728 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
+        PYTHON_EXECUTABLE=python EXECUTORCH_BUILD_QNN=ON QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029 bash build/build_android_llm_demo.sh ${ARTIFACTS_DIR_NAME}
 
   # Let's see how expensive this job is, we might want to tone it down by running it periodically
   benchmark-on-device:
 
@@ -93,14 +93,14 @@ class Model(torch.nn.Module):
 source_model = Model()
 example_inputs = (torch.randn((1, 3, 256, 256)), )
 
-pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()
+pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()
 
 quantization_config = LinearQuantizerConfig.from_dict(
     {
         "global_config": {
             "quantization_scheme": QuantizationScheme.symmetric,
-            "activation_dtype": torch.uint8,
-            "weight_dtype": torch.int8,
+            "activation_dtype": torch.quint8,
+            "weight_dtype": torch.qint8,
             "weight_per_channel": True,
         }
     }
 
@@ -47,11 +47,7 @@ cmake --build "$COREMLTOOLS_DIR_PATH/build" --parallel
 
 echo "${green}ExecuTorch: Installing coremltools."
 pip install "$COREMLTOOLS_DIR_PATH"
-# CoreMLTools have started supporting numpy 2.0,
-# but ExecuTorch example model test env is still using older transformers,
-# so for now we will need to downgrade numpy to 1.x
-# TODO: Remove this numpy downgrade once later transformers starts to be used
-pip install numpy==1.26.4
+
 STATUS=$?
 if [ $STATUS -ne 0 ]; then
     echo "${red}ExecuTorch: Failed to install coremltools."
 
@@ -1,73 +1,74 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
 # pyre-unsafe
 
-from typing import cast, Optional
+from typing import cast
 
 import torch.fx
+from executorch.backends.arm._passes.arm_pass_utils import create_node
 from executorch.exir.dialects._ops import ops as exir_ops
 from executorch.exir.pass_base import ExportPass, PassResult
-from torch._ops import OpOverload
 
 
 def conv_remainder(input_length, pad, dilation, weight, stride):
     """
-    Returns the size
+    Returns the remainder of input_length; given the padding, dilation, stride,
+    and kernel size.
     """
     return (input_length + 2 * pad - dilation * (weight - 1) - 1) % stride
 
 
-def insert_q_dq_pair(
-    graph: torch.fx.Graph,
-    anchor: torch.fx.Node,
-    q_params: tuple,
-):
-    with graph.inserting_after(anchor):
-        q = create_node(
-            graph=graph,
-            op_target=exir_ops.edge.quantized_decomposed.quantize_per_tensor.default,
-            args=(),  # We add the argument last
-        )
-        q.meta = anchor.meta
-
-    with graph.inserting_after(q):
-        dq = create_node(
-            graph=graph,
-            op_target=exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default,
-            args=(q,) + q_params,
-        )
-        dq.meta = q.meta
-
-    anchor.replace_all_uses_with(dq)
-    # We add this last so the replace all uses above does not replace the quantized
-    # node's first use
-    q.args = (anchor,) + q_params
-    return dq
-
-
-def create_node(
-    graph: torch.fx.Graph,
-    op_target: OpOverload,
-    args: tuple = (),
-    kwargs: Optional[dict] = None,
-):
-    return graph.create_node(
-        "call_function",
-        op_target,
-        args=args,
-        kwargs=kwargs or {},
-    )
-
-
 class SizeAdjustConv2DPass(ExportPass):
     """
-    Adjust the convolution input size to match perfectly with the
-    weight size, padding, stride and dilation parameters.
-    This is done by inserting a slice op to remove the uneven end of the input.
+    Adjust the convolution input size to match the kernel size, padding, stride,
+    and dilation parameters. Pytorch allows the input and kernel shape to not
+    "match", in which case the remaining rows/columns are truncated. However,
+    matching the size is a requirement in the TOSA specification. In case the
+    input and kernel shape do not match, the following is done to meet the
+    specification:
+
+      1) The padding is truncated (done in the node visitor)
+      2) (if neccessary) The input is truncated (done in this pass)."
+
+    A simple example would be a 2x2 kernel (no padding, stride=2) and a 5x5
+    input:
+
+    ┌───┬───┬───┬───┬───┐    ┌───┬───┬───┬───┬───┐    ┌───┬───┬───┬───┬───┐
+    │ X │ X │   │   │   │    │   │   │ X │ X │   │    │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │ X │ X │   │   │   │    │   │   │ X │ X │   │    │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │   │   │   │ -> │   │   │   │   │   │ -> │ X │ X │   │   │   │ ->
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │   │   │   │    │   │   │   │   │   │    │ X │ X │   │   │   │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │   │   │   │    │   │   │   │   │   │    │   │   │   │   │   │
+    └───┴───┴───┴───┴───┘    └───┴───┴───┴───┴───┘    └───┴───┴───┴───┴───┘
+         First pass               second pass              third pass
+
+    ┌───┬───┬───┬───┬───┐    ┌───┬───┬───┬───┬───┐
+    │   │   │   │   │   │    │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │   │   │   │    │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │ X │ X │   │ -> │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │ X │ X │   │    │   │   │   │   │ - │
+    ├───┼───┼───┼───┼───┤    ├───┼───┼───┼───┼───┤
+    │   │   │   │   │   │    │ - │ - │ - │ - │ - │
+    └───┴───┴───┴───┴───┘    └───┴───┴───┴───┴───┘
+         Fourth pass            Unvisited cells
+
+    Cells that are never visited are marked with `-` and are never considered
+    when the kernel traverses over the input, hence they can be removed.
+
+    To match the shape of the kernel (and all parameters) with the input, a
+    slice op is inserted to remove the remaining edges (rows and columns) of the
+    input.
     """
 
     conv2d_op = exir_ops.edge.aten.convolution.default
@@ -109,9 +110,7 @@ def call(self, graph_module: torch.fx.GraphModule):
             with graph_module.graph.inserting_before(node):
                 last_node = cast(torch.fx.Node, input_node)
                 for args in slice_args:
-                    slice_node = graph.create_node(
-                        "call_function", self.slice_op, (last_node,) + args
-                    )
+                    slice_node = create_node(graph, self.slice_op, (last_node,) + args)
                     last_node = slice_node
                 conv_node.replace_input_with(cast(torch.fx.Node, input_node), last_node)
                 modified_graph = True
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -11,11 +11,6 @@
 import serializer.tosa_serializer as ts
 import torch
 import torch.fx
-
-# pyre-fixme[21]: 'Could not find a module corresponding to import `executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass`.'
-from executorch.backends.arm._passes.fold_qdq_with_annotated_qparams_pass import (
-    get_input_qparams,
-)
 from executorch.backends.arm.operators.node_visitor import NodeVisitor
 from executorch.backends.arm.tosa_mapping import map_dtype, TosaArg
 from executorch.backends.arm.tosa_quant_utils import (
@@ -24,11 +19,7 @@
     is_node_quantized,
 )
 from executorch.backends.arm.tosa_specification import TosaSpecification
-from executorch.backends.arm.tosa_utils import (
-    getNodeArgs,
-    is_bias_node_for_quantized_conv,
-    tosa_shape,
-)
+from executorch.backends.arm.tosa_utils import getNodeArgs, tosa_shape
 from torch.export.exported_program import ExportedProgram
 
 
@@ -99,41 +90,6 @@ def process_inputs(
     tosa_graph.addInputTensor(tensor)
 
 
-def process_quantized_bias(
-    node: torch.fx.Node,
-    tosa_graph: ts.TosaSerializer,
-    parameter_values,
-):
-    """
-    Serialize bias node that needs to be quantized.
-    """
-    consumer_node = list(node.users)[0]
-    (
-        input_node,
-        weight_node,
-        _,
-    ) = consumer_node.all_input_nodes
-
-    input_qargs = get_input_qparams(  # pyre-ignore[16]: Module `executorch.backends.arm` has no attribute `_passes`.
-        consumer_node
-    )
-
-    input_node_scale = input_qargs[0].scale
-    weight_node_scale = input_qargs[1].scale
-    bias_values_quantized = (
-        (parameter_values / (input_node_scale * weight_node_scale))
-        .round()
-        .astype(np.int32)
-    )
-
-    tosa_graph.addConst(
-        bias_values_quantized.shape,
-        ts.DType.INT32,
-        bias_values_quantized,
-        name=node.name,
-    )
-
-
 def process_inputs_to_parameters(
     node: torch.fx.Node,
     tosa_graph: ts.TosaSerializer,
@@ -148,20 +104,14 @@ def process_inputs_to_parameters(
     assert isinstance(parameter_data, torch.Tensor), "Expect Attr to be tensor"
     parameter_values = parameter_data.detach().numpy()
 
-    if is_bias_node_for_quantized_conv(node):
-        # BI bias
-        assert tosa_spec.support_integer(), f"{tosa_spec} doesnt't support integer"
-        process_quantized_bias(node, tosa_graph, parameter_values)
-    else:
-        # MI weights or bias
-        if inputs[0].dtype == torch.float32:
-            assert tosa_spec.support_float(), f"{tosa_spec} doesn't support float"
+    if inputs[0].dtype == torch.float32:
+        assert tosa_spec.support_float(), f"{tosa_spec} doesn't support float"
 
-        parameter_values = np.transpose(parameter_values, inputs[0].dim_order)
+    parameter_values = np.transpose(parameter_values, inputs[0].dim_order)
 
-        tosa_graph.addConst(
-            parameter_values.shape, inputs[0].dtype, parameter_values, name=node.name
-        )
+    tosa_graph.addConst(
+        parameter_values.shape, inputs[0].dtype, parameter_values, name=node.name
+    )
 
 
 def process_inputs_to_buffers(
 
@@ -1,4 +1,4 @@
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -196,7 +196,7 @@ def get_quant_properties(  # noqa: C901
     input_act_qspec = quantization_config.get_input_act_qspec()
     weight_qspec = quantization_config.get_weight_qspec()
     output_act_qspec = quantization_config.get_output_act_qspec()
-    bias_qspec = quantization_config.get_bias_qspec()
+    bias_qspec = quantization_config.get_bias_qspec(node)
 
     quant_properties = _OpQuantProperties()
 
 
@@ -1,5 +1,5 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
-# Copyright 2024 Arm Limited and/or its affiliates.
+# Copyright 2024-2025 Arm Limited and/or its affiliates.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
@@ -9,8 +9,10 @@
 from dataclasses import dataclass
 
 import torch
+from torch.ao.quantization import ObserverOrFakeQuantize
 
 from torch.ao.quantization.quantizer import (
+    DerivedQuantizationSpec,
     FixedQParamsQuantizationSpec,
     QuantizationSpec,
 )
@@ -53,8 +55,42 @@ def get_weight_qspec(self) -> QuantizationSpec | None:
         ], f"Unsupported quantization_spec {self.weight} for weight"
         return self.weight
 
-    def get_bias_qspec(self) -> QuantizationSpec | None:
+    def get_bias_qspec(self, node: torch.fx.Node) -> QuantizationSpec | None:
         """Returns QuantizationSpec 'bias' after asserting that bias.dtype is torch.float."""
+
+        def _derive_qparams_fn(
+            obs_or_fqs: list[ObserverOrFakeQuantize],
+        ) -> tuple[torch.Tensor, torch.Tensor]:
+            assert (
+                len(obs_or_fqs) == 2
+            ), "Expecting two obs/fqs, one for activation and one for weight, got: {}".format(
+                len(obs_or_fqs)
+            )
+            act_obs_or_fq = obs_or_fqs[0]
+            weight_obs_or_fq = obs_or_fqs[1]
+            act_scale, act_zp = act_obs_or_fq.calculate_qparams()
+            weight_scale, weight_zp = weight_obs_or_fq.calculate_qparams()
+            return torch.tensor([act_scale * weight_scale]).to(
+                torch.float32
+            ), torch.tensor([0]).to(torch.int32)
+
+        if node.target in [
+            torch.ops.aten.conv1d.default,
+            torch.ops.aten.conv2d.default,
+            torch.ops.aten.linear.default,
+        ]:
+            input_act = node.args[0]
+            weight = node.args[1]
+            quantization_spec = DerivedQuantizationSpec(
+                derived_from=[(input_act, node), (weight, node)],
+                derive_qparams_fn=_derive_qparams_fn,
+                dtype=torch.int32,
+                quant_min=torch.iinfo(torch.int32).min,
+                quant_max=torch.iinfo(torch.int32).max - 1,
+                qscheme=torch.per_tensor_symmetric,
+            )
+            return quantization_spec
+
         if self.bias is None:
             return None
         assert (
Original file line number	Diff line number	Diff line change
`@@ -93,14 +93,14 @@ class Model(torch.nn.Module):`
`93`	`93`	`source_model = Model()`
`94`	`94`	`example_inputs = (torch.randn((1, 3, 256, 256)), )`
`95`	`95`
`96`		`-pre_autograd_aten_dialect = export_for_training(model, example_inputs).module()`
	`96`	`+pre_autograd_aten_dialect = export_for_training(source_model, example_inputs).module()`
`97`	`97`
`98`	`98`	`quantization_config = LinearQuantizerConfig.from_dict(`
`99`	`99`	`{`
`100`	`100`	`"global_config": {`
`101`	`101`	`"quantization_scheme": QuantizationScheme.symmetric,`
`102`		`- "activation_dtype": torch.uint8,`
`103`		`- "weight_dtype": torch.int8,`
	`102`	`+ "activation_dtype": torch.quint8,`
	`103`	`+ "weight_dtype": torch.qint8,`
`104`	`104`	`"weight_per_channel": True,`
`105`	`105`	`}`
`106`	`106`	`}`