From 5059ee62b0fee539949f97737517102ff1e2b2f2 Mon Sep 17 00:00:00 2001
From: Nitin Jain <jainnitin@meta.com>
Date: Mon, 25 Aug 2025 13:39:54 -0700
Subject: [PATCH] Add 16A8W support and test for add operation

Add 16A8W quantization support and test for the add operation in ExecutorTorch ARM backend.

This follows the pattern established for linear operations, extending int16 support to add operations.

Changes:
- Add INT16 dtype validation support in op_add.py
- Add test_add_tensor_16a8w_tosa_INT test function
- Enable test_add.py in test targets configuration

The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency.

Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463/)

[ghstack-poisoned]
---
 backends/arm/operators/op_add.py  |  2 +-
 backends/arm/test/ops/test_add.py | 47 +++++++++++++++++++++++++++++++
 backends/arm/test/targets.bzl     |  1 +
 3 files changed, 49 insertions(+), 1 deletion(-)

diff --git a/backends/arm/operators/op_add.py b/backends/arm/operators/op_add.py
index 7a022b54395..2165adf49ed 100644
--- a/backends/arm/operators/op_add.py
+++ b/backends/arm/operators/op_add.py
@@ -50,7 +50,7 @@ def define_node(
         validate_valid_dtype(
             self.target,
             [*inputs, output],
-            [ts.DType.INT8, ts.DType.INT32],
+            [ts.DType.INT8, ts.DType.INT16, ts.DType.INT32],
             output.tosa_spec,
         )
 
diff --git a/backends/arm/test/ops/test_add.py b/backends/arm/test/ops/test_add.py
index 6bf3830d038..25dc8c27ce8 100644
--- a/backends/arm/test/ops/test_add.py
+++ b/backends/arm/test/ops/test_add.py
@@ -10,6 +10,10 @@
 import pytest
 import torch
 from executorch.backends.arm.quantizer import arm_quantizer
+from executorch.backends.arm.quantizer.arm_quantizer import (
+    get_symmetric_a16w8_quantization_config,
+    TOSAQuantizer,
+)
 from executorch.backends.arm.test import common, conftest
 from executorch.backends.arm.test.tester.test_pipeline import (
     EthosU55PipelineINT,
@@ -216,3 +220,46 @@ def test_add_tensor_vgf_INT(test_data: input_t1):
         tosa_version="TOSA-1.0+INT",
     )
     pipeline.run()
+
+
+def get_symmetric_a16w8_add_quantizer(u55_config=False, per_channel_quantization=False):
+    tosa_version = conftest.get_option("tosa_version")
+    tosa_profiles = {
+        "1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
+    }
+
+    quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
+    quantizer.set_global(
+        get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
+    )
+
+    return Quantize(
+        quantizer,
+        get_symmetric_a16w8_quantization_config(
+            is_per_channel=per_channel_quantization
+        ),
+    )
+
+
+@common.parametrize("test_data", Add.test_data)
+def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
+    """Test add operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
+    per_channel_quantization = False
+
+    pipeline = TosaPipelineINT[input_t1](
+        Add(),
+        test_data(),
+        aten_op,
+        exir_op=[],
+        per_channel_quantization=per_channel_quantization,
+        use_to_edge_transform_and_lower=True,
+        tosa_extensions=["int16"],
+    )
+
+    pipeline.change_args(
+        "quantize",
+        get_symmetric_a16w8_add_quantizer(
+            per_channel_quantization=per_channel_quantization
+        ),
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
index acb27f13798..405f1bbf081 100644
--- a/backends/arm/test/targets.bzl
+++ b/backends/arm/test/targets.bzl
@@ -13,6 +13,7 @@ def define_arm_tests():
 
     # Operators
     test_files += [
+        "ops/test_add.py",
         "ops/test_avg_pool2d.py",
         "ops/test_linear.py", 
         "ops/test_slice.py",