From 80dc7e7a9c9565c51e31020139848fe560a10a50 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Thu, 28 Aug 2025 21:20:07 -0700
Subject: [PATCH 1/4] Add 16A8W support and test for add operation

Add 16A8W quantization support and test for the add operation in ExecutorTorch ARM backend.

This follows the pattern established for linear operations, extending int16 support to add operations.

Changes:
- Add INT16 dtype validation support in op_add.py
- Add test_add_tensor_16a8w_tosa_INT test function
- Enable test_add.py in test targets configuration

The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency.

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463/)

[ghstack-poisoned]
---
 backends/arm/operators/op_add.py  |   6 +-
 backends/arm/test/ops/test_add.py | 108 ++++++++++++++++++++++++++++++
 backends/arm/test/targets.bzl     |   1 +
 3 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
index 7a022b54395..1cb53a8477d 100644
--- a/backends/arm/operators/op_add.py
+++ b/backends/arm/operators/op_add.py
@@ -47,10 +47,14 @@ def define_node(
 
         validate_num_inputs(self.target, inputs, 2)
         validate_same_dtype(self.target, [*inputs, output], ts)
+        valid_dtypes = []
+        if self.tosa_spec.support_integer():
+            valid_dtypes.extend([ts.DType.INT8, ts.DType.INT16, ts.DType.INT32])
+
         validate_valid_dtype(
             self.target,
             [*inputs, output],
-            [ts.DType.INT8, ts.DType.INT32],
+            valid_dtypes,
             output.tosa_spec,
         )
 
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
index 6bf3830d038..d5b16956011 100644
--- a/backends/arm/test/ops/test_add.py
+++ b/backends/arm/test/ops/test_add.py
@@ -10,6 +10,10 @@
 import pytest
 import torch
 from executorch.backends.arm.quantizer import arm_quantizer
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    get_symmetric_a16w8_quantization_config,
+    TOSAQuantizer,
+)
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -216,3 +220,107 @@ def test_add_tensor_vgf_INT(test_data: input_t1):
         tosa_version="TOSA-1.0+INT",
     )
     pipeline.run()
+
+
+def get_symmetric_a16w8_add_quantizer(u55_config=False, per_channel_quantization=False):
+    tosa_version = conftest.get_option("tosa_version")
+    tosa_profiles = {
+        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
+    }
+
+    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
+    quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
+    )
+
+    return Quantize(
+        quantizer,
+        get_symmetric_a16w8_quantization_config(
+            is_per_channel=per_channel_quantization
+        ),
+    )
+
+
+@common.parametrize("test_data", Add.test_data)
+@pytest.mark.xfail(
+    reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
+)
+def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
+    """Test add operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = TosaPipelineINT[input_t1](
+        Add(),
+        test_data(),
+        aten_op,
+        exir_op=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_add_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Add.test_data)
+@common.XfailIfNoCorstone300
+@pytest.mark.xfail(
+    reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
+)
+def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
+    """Test add operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = EthosU55PipelineINT[input_t1](
+        Add(),
+        test_data(),
+        aten_op,
+        exir_op,
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_add_quantizer(
+            u55_config=True, per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", Add.test_data)
+@common.XfailIfNoCorstone320
+@pytest.mark.xfail(
+    reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
+)
+def test_add_tensor_16a8w_u85_INT16(test_data: input_t1):
+    """Test add operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = EthosU85PipelineINT[input_t1](
+        Add(),
+        test_data(),
+        aten_op,
+        exir_op=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+        run_on_fvp=True,
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_add_quantizer(
+            u55_config=False, per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index acb27f13798..405f1bbf081 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -13,6 +13,7 @@ def define_arm_tests():
 
     # Operators
     test_files += [
+        "ops/test_add.py",
         "ops/test_avg_pool2d.py",
         "ops/test_linear.py", 
         "ops/test_slice.py",

From 73519e8a70a316417502ff45a327b39fc9db2cd9 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Thu, 28 Aug 2025 22:10:48 -0700
Subject: [PATCH 2/4] Update on "Add 16A8W support and test for add operation"

Add 16A8W quantization support and test for the add operation in ExecutorTorch ARM backend.

This follows the pattern established for linear operations, extending int16 support to add operations.

Changes:
- Add INT16 dtype validation support in op_add.py
- Add test_add_tensor_16a8w_tosa_INT test function
- Enable test_add.py in test targets configuration

The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency.

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463/)

[ghstack-poisoned]
---
 backends/arm/test/ops/test_add.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
index d5b16956011..4f6bafa6384 100644
--- a/backends/arm/test/ops/test_add.py
+++ b/backends/arm/test/ops/test_add.py
@@ -222,7 +222,7 @@ def test_add_tensor_vgf_INT(test_data: input_t1):
     pipeline.run()
 
 
-def get_symmetric_a16w8_add_quantizer(u55_config=False, per_channel_quantization=False):
+def get_symmetric_a16w8_add_quantizer(per_channel_quantization=False):
     tosa_version = conftest.get_option("tosa_version")
     tosa_profiles = {
         "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
@@ -271,7 +271,7 @@ def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
 @common.parametrize("test_data", Add.test_data)
 @common.XfailIfNoCorstone300
 @pytest.mark.xfail(
-    reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
+    reason="Vela compilation fails with 'Invalid arguments' for int16 add operations"
 )
 def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
     """Test add operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
@@ -284,14 +284,13 @@ def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
         exir_op,
         per_channel_quantization=per_channel_quantization,
         use_to_edge_transform_and_lower=True,
-        tosa_extensions=["int16"],
         run_on_fvp=True,
     )
 
     pipeline.change_args(
         "quantize",
         get_symmetric_a16w8_add_quantizer(
-            u55_config=True, per_channel_quantization=per_channel_quantization
+            per_channel_quantization=per_channel_quantization
         ),
     )
     pipeline.run()
@@ -300,7 +299,7 @@ def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
 @common.parametrize("test_data", Add.test_data)
 @common.XfailIfNoCorstone320
 @pytest.mark.xfail(
-    reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
+    reason="Vela compilation fails with 'Invalid arguments' for int16 add operations"
 )
 def test_add_tensor_16a8w_u85_INT16(test_data: input_t1):
     """Test add operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
@@ -310,17 +309,16 @@ def test_add_tensor_16a8w_u85_INT16(test_data: input_t1):
         Add(),
         test_data(),
         aten_op,
-        exir_op=[],
+        exir_op,
         per_channel_quantization=per_channel_quantization,
         use_to_edge_transform_and_lower=True,
-        tosa_extensions=["int16"],
         run_on_fvp=True,
     )
 
     pipeline.change_args(
         "quantize",
         get_symmetric_a16w8_add_quantizer(
-            u55_config=False, per_channel_quantization=per_channel_quantization
+            per_channel_quantization=per_channel_quantization
         ),
     )
     pipeline.run()

From f7f835029ef02395f2624fd0ac31721000a1b997 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Thu, 28 Aug 2025 22:18:07 -0700
Subject: [PATCH 3/4] Update on "Add 16A8W support and test for add operation"

Add 16A8W quantization support and comprehensive tests for the add operation in ExecutorTorch ARM backend targeting Ethos U55 and U85 NPUs.

This follows the pattern established for linear operations, extending int16 support to add operations with hardware-specific testing.

Changes:
- Add INT16 dtype validation support in op_add.py
- Add test_add_tensor_16a8w_tosa_INT test function with U55/U85 pipeline support
- Add U55 and U85 specific 16A8W tests with proper xfail decorators
- Fix U55/U85 test parameter usage (remove unsupported tosa_extensions, clean quantizer function calls)
- Update xfail reasons to consistent 'Vela compilation fails with Invalid arguments' pattern

exported-using-ghexport

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463/)

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463)

[ghstack-poisoned]

From 15b87c32a57f46e8018edc95e4040f7af26fade7 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Thu, 28 Aug 2025 22:42:43 -0700
Subject: [PATCH 4/4] Update on "Add 16A8W support and test for add operation"

Add 16A8W quantization support and comprehensive tests for the add operation in ExecutorTorch ARM backend targeting Ethos U55 and U85 NPUs.

This follows the pattern established for linear operations, extending int16 support to add operations with hardware-specific testing.

Changes:
- Add INT16 dtype validation support in op_add.py
- Add test_add_tensor_16a8w_tosa_INT test function with U55/U85 pipeline support
- Add U55 and U85 specific 16A8W tests with proper xfail decorators
- Fix U55/U85 test parameter usage (remove unsupported tosa_extensions, clean quantizer function calls)
- Update xfail reasons to consistent 'Vela compilation fails with Invalid arguments' pattern

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463)

[ghstack-poisoned]