From 388201c23b2da9b84a0edf09f1a34f18d1fb3973 Mon Sep 17 00:00:00 2001 From: Rahul Chandra Date: Tue, 14 Oct 2025 13:06:47 -0700 Subject: [PATCH] Enable 16-bit activations in Cadence Quantizer For fully_connected and linear (#15010) Summary: # Context We currently only support 8-bit for most operators. We would like to add generic ops for 16-bit activations, for the following ops: - quantized_fully_connected - quantized_linear - quantized_conv (all flavors) - quantized_matmul # This Diff Here, we add support for `quantized_linear` and `quantized_fully_connected`. We need to do the following: 1. Allow 16-bit activations in `quantized_fully_connected_out.cpp` and `quantized_linear_out.cpp`. 2. Allow 16-bit activations in `ref_implementations.py`, so tests can run with 16-bit activations to validate the quantization is correct. 3. Add a quantizer(`CadenceWith16BitLinearActivationsQuantizer`) for checking this works and create a unit test. Reviewed By: DrJessop, hsharma35 Differential Revision: D84284794 --- backends/cadence/aot/quantizer/quantizer.py | 13 +++++++++++ backends/cadence/aot/ref_implementations.py | 2 +- .../aot/tests/test_ref_implementations.py | 22 ++++++++++++++++--- 3 files changed, 33 insertions(+), 4 deletions(-) diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index d4af074c475..786b7d6cdf2 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -342,3 +342,16 @@ def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: quantizers = get_cadence_default_quantizers() quantizers.append(CadenceAtenQuantizer(SoftmaxPattern(), qconfig_A16)) super().__init__(quantizers) + + +class CadenceWith16BitLinearActivationsQuantizer(CadenceQuantizer): + """ + Quantizer including A16 fully_connected + """ + + def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: + if quantizers is None: + quantizers = [] + # Add 16-bit quantizers for LinearPattern + quantizers.append(CadenceAtenQuantizer(LinearPattern(), qconfig_A16)) + super().__init__(quantizers) diff --git a/backends/cadence/aot/ref_implementations.py b/backends/cadence/aot/ref_implementations.py index ed9bb438a9e..b91f585fb16 100644 --- a/backends/cadence/aot/ref_implementations.py +++ b/backends/cadence/aot/ref_implementations.py @@ -261,7 +261,7 @@ def quantized_linear_common( src = src.view(-1, K) dtype = src.dtype - supported_dtypes = [torch.int8, torch.uint8, torch.int32] + supported_dtypes = [torch.int8, torch.uint8, torch.int16, torch.int32] if dtype not in supported_dtypes: raise ValueError( f"Unsupported dtype to quantize to {dtype}. Supported dtypes must be one of {supported_dtypes}" diff --git a/backends/cadence/aot/tests/test_ref_implementations.py b/backends/cadence/aot/tests/test_ref_implementations.py index 259752f3893..6aa091147c7 100644 --- a/backends/cadence/aot/tests/test_ref_implementations.py +++ b/backends/cadence/aot/tests/test_ref_implementations.py @@ -183,6 +183,8 @@ def test_quantized_add( (False, torch.int8), (True, torch.int8), (True, torch.uint8), + (True, torch.int16), + (False, torch.int16), ) ], # Test case 2: 1x3 input, 2x3 weight (2 output features) @@ -207,6 +209,8 @@ def test_quantized_add( for (per_tensor, dtype) in ( (False, torch.int8), (True, torch.int8), + (False, torch.int16), + (True, torch.int16), ) ], *[ @@ -256,6 +260,8 @@ def test_quantized_add( for (per_tensor, dtype) in ( (False, torch.int8), (True, torch.int8), + (False, torch.int16), + (True, torch.int16), ) ], # Test case 4: Non-zero zero points @@ -280,6 +286,8 @@ def test_quantized_add( for (per_tensor, dtype) in ( (False, torch.int8), (True, torch.int8), + (False, torch.int16), + (True, torch.int16), # (True, torch.uint8), ) ], @@ -302,7 +310,10 @@ def test_quantized_add( False, False, ) - for dtype in (torch.int8,) + for dtype in ( + torch.int8, + torch.int16, + ) ], # Test case 6: Non-zero out_shift (shift=1) *[ @@ -325,7 +336,12 @@ def test_quantized_add( False, False, ) - for (per_tensor, dtype) in ((False, torch.int8), (True, torch.int8)) + for (per_tensor, dtype) in ( + (False, torch.int8), + (True, torch.int8), + (False, torch.int16), + (True, torch.int16), + ) ], *[ ( @@ -348,7 +364,7 @@ def test_quantized_add( transposed_matmul, ) for (matmul, transposed_matmul) in ((True, False), (True, True)) - for (per_tensor, dtype) in ((True, torch.int8),) + for (per_tensor, dtype) in ((True, torch.int8), (True, torch.int16)) ], *[ (