Skip to content

Commit 4c2d4be

Browse files
committed
Add 16A8W support and test for add operation
Add 16A8W quantization support and test for the add operation in ExecutorTorch ARM backend. This follows the pattern established for linear operations, extending int16 support to add operations. Changes: - Add INT16 dtype validation support in op_add.py - Add test_add_tensor_16a8w_tosa_INT test function - Enable test_add.py in test targets configuration The 16A8W configuration uses 16-bit activations with 8-bit weights, enabling higher precision for activations while maintaining weight efficiency. Differential Revision: [D80510463](https://our.internmc.facebook.com/intern/diff/D80510463/) ghstack-source-id: 305897355 Pull Request resolved: #13789
1 parent 6208340 commit 4c2d4be

File tree

3 files changed

+114
-1
lines changed

3 files changed

+114
-1
lines changed

backends/arm/operators/op_add.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,14 @@ def define_node(
4747

4848
validate_num_inputs(self.target, inputs, 2)
4949
validate_same_dtype(self.target, [*inputs, output], ts)
50+
valid_dtypes = []
51+
if self.tosa_spec.support_integer():
52+
valid_dtypes.extend([ts.DType.INT8, ts.DType.INT16, ts.DType.INT32])
53+
5054
validate_valid_dtype(
5155
self.target,
5256
[*inputs, output],
53-
[ts.DType.INT8, ts.DType.INT32],
57+
valid_dtypes,
5458
output.tosa_spec,
5559
)
5660

backends/arm/test/ops/test_add.py

Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,10 @@
1010
import pytest
1111
import torch
1212
from executorch.backends.arm.quantizer import arm_quantizer
13+
from executorch.backends.arm.quantizer.arm_quantizer import (
14+
get_symmetric_a16w8_quantization_config,
15+
TOSAQuantizer,
16+
)
1317
from executorch.backends.arm.test import common, conftest
1418
from executorch.backends.arm.test.tester.test_pipeline import (
1519
EthosU55PipelineINT,
@@ -216,3 +220,107 @@ def test_add_tensor_vgf_INT(test_data: input_t1):
216220
tosa_version="TOSA-1.0+INT",
217221
)
218222
pipeline.run()
223+
224+
225+
def get_symmetric_a16w8_add_quantizer(u55_config=False, per_channel_quantization=False):
226+
tosa_version = conftest.get_option("tosa_version")
227+
tosa_profiles = {
228+
"1.0": TosaSpecification.create_from_string("TOSA-1.0+INT+int16"),
229+
}
230+
231+
quantizer = TOSAQuantizer(tosa_profiles[tosa_version])
232+
quantizer.set_global(
233+
get_symmetric_a16w8_quantization_config(is_per_channel=per_channel_quantization)
234+
)
235+
236+
return Quantize(
237+
quantizer,
238+
get_symmetric_a16w8_quantization_config(
239+
is_per_channel=per_channel_quantization
240+
),
241+
)
242+
243+
244+
@common.parametrize("test_data", Add.test_data)
245+
@pytest.mark.xfail(
246+
reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
247+
)
248+
def test_add_tensor_16a8w_tosa_INT(test_data: input_t1):
249+
"""Test add operation with 16A8W quantization (16-bit activations, 8-bit weights)"""
250+
per_channel_quantization = False
251+
252+
pipeline = TosaPipelineINT[input_t1](
253+
Add(),
254+
test_data(),
255+
aten_op,
256+
exir_op=[],
257+
per_channel_quantization=per_channel_quantization,
258+
use_to_edge_transform_and_lower=True,
259+
tosa_extensions=["int16"],
260+
)
261+
262+
pipeline.change_args(
263+
"quantize",
264+
get_symmetric_a16w8_add_quantizer(
265+
per_channel_quantization=per_channel_quantization
266+
),
267+
)
268+
pipeline.run()
269+
270+
271+
@common.parametrize("test_data", Add.test_data)
272+
@common.XfailIfNoCorstone300
273+
@pytest.mark.xfail(
274+
reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
275+
)
276+
def test_add_tensor_16a8w_u55_INT16(test_data: input_t1):
277+
"""Test add operation with 16A8W quantization on U55 (16-bit activations, 8-bit weights)"""
278+
per_channel_quantization = False
279+
280+
pipeline = EthosU55PipelineINT[input_t1](
281+
Add(),
282+
test_data(),
283+
aten_op,
284+
exir_op,
285+
per_channel_quantization=per_channel_quantization,
286+
use_to_edge_transform_and_lower=True,
287+
tosa_extensions=["int16"],
288+
run_on_fvp=True,
289+
)
290+
291+
pipeline.change_args(
292+
"quantize",
293+
get_symmetric_a16w8_add_quantizer(
294+
u55_config=True, per_channel_quantization=per_channel_quantization
295+
),
296+
)
297+
pipeline.run()
298+
299+
300+
@common.parametrize("test_data", Add.test_data)
301+
@common.XfailIfNoCorstone320
302+
@pytest.mark.xfail(
303+
reason="missing int16 add ops support; fails at TOSA reference model with Unsupported operation type or rank"
304+
)
305+
def test_add_tensor_16a8w_u85_INT16(test_data: input_t1):
306+
"""Test add operation with 16A8W quantization on U85 (16-bit activations, 8-bit weights)"""
307+
per_channel_quantization = False
308+
309+
pipeline = EthosU85PipelineINT[input_t1](
310+
Add(),
311+
test_data(),
312+
aten_op,
313+
exir_op=[],
314+
per_channel_quantization=per_channel_quantization,
315+
use_to_edge_transform_and_lower=True,
316+
tosa_extensions=["int16"],
317+
run_on_fvp=True,
318+
)
319+
320+
pipeline.change_args(
321+
"quantize",
322+
get_symmetric_a16w8_add_quantizer(
323+
u55_config=False, per_channel_quantization=per_channel_quantization
324+
),
325+
)
326+
pipeline.run()

backends/arm/test/targets.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def define_arm_tests():
1313

1414
# Operators
1515
test_files += [
16+
"ops/test_add.py",
1617
"ops/test_avg_pool2d.py",
1718
"ops/test_linear.py",
1819
"ops/test_slice.py",

0 commit comments

Comments
 (0)