From 817c4596ae27e56ebd4214eebf7ad8dcf58c4301 Mon Sep 17 00:00:00 2001 From: Matthias Cremon Date: Mon, 14 Apr 2025 17:53:15 -0700 Subject: [PATCH] Remove layer norm from the default quantizer, add one that has it Summary: Layer norm is not performing great in quantized mode, and is currently using a split scheme (weights are quantized, activations are not). In most cases, it's actually much faster to keep it fp32, so this diff removes it from the default quantizer. We add a CadenceWithLayerNormQuantizer for easy access to the current behavior, which can be good in some cases (mostly if quantizing layer norm will help extend the quantized liveness). Differential Revision: D72941790 --- backends/cadence/aot/quantizer/quantizer.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/backends/cadence/aot/quantizer/quantizer.py b/backends/cadence/aot/quantizer/quantizer.py index 69c9518166b..761b2bf8d31 100644 --- a/backends/cadence/aot/quantizer/quantizer.py +++ b/backends/cadence/aot/quantizer/quantizer.py @@ -193,7 +193,6 @@ def get_cadence_default_quantizers() -> List[Quantizer]: CadenceAtenQuantizer(BmmPattern(), qconfig_A8W8), CadenceAtenQuantizer(Conv1dPattern(), qconfig_A8W8sym), CadenceAtenQuantizer(Conv2dPattern(), qconfig_A8W8sym), - CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8W8), CadenceAtenQuantizer(LinearPattern(), qconfig_A8W8), CadenceAtenQuantizer(MatmulPattern(), qconfig_A8W8), CadenceAtenQuantizer(ReluPattern0(), qconfig_A8W8), @@ -236,9 +235,21 @@ def __init__( super().__init__([]) +class CadenceWithLayerNormQuantizer(CadenceQuantizer): + """ + Quantizer including layer norm + """ + + def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: + if quantizers is None: + quantizers = get_cadence_default_quantizers() + quantizers.append(CadenceAtenQuantizer(LayerNormPattern(), qconfig_A8W8)) + super().__init__(quantizers) + + class CadenceWakeWordQuantizer(CadenceQuantizer): """ - Quantizer for WakeWord, including add + Quantizer for WakeWord, including add and cat """ def __init__(self, quantizers: Optional[list[Quantizer]] = None) -> None: