[qat proto]

Joey Tsai · Joey Tsai · commit 03836c814cf6 · 2024-10-15T13:14:22.000+08:00
- Add qat proto
- Add Unit test test_qnn_backend_linear_qat
- Test command
```bash
python backends/qualcomm/tests/test_qnn_delegate.py -H $HOST -s $DEVICE -b $build-android/ -m "SM8650" -r $EXECUTORCH_ROOT -k TestQNNQuantizedOperator.test_qnn_backend_linear_qat
```
diff --git a/backends/qualcomm/quantizer/quantizer.py b/backends/qualcomm/quantizer/quantizer.py
@@ -28,6 +28,7 @@
     get_default_16bit_qnn_ptq_config,
     get_default_8bit_qnn_ptq_config,
     get_ptq_per_channel_quant_config,
+    get_default_8bit_qat_proto,
     OP_ANNOTATOR,
     QuantizationConfig,
 )
@@ -39,6 +40,7 @@
     "get_16a8w_qnn_ptq_config",
     "get_default_16bit_qnn_ptq_config",
     "get_default_8bit_qnn_ptq_config",
+    "get_default_8bit_qat_proto",
 ]
 
 
diff --git a/backends/qualcomm/quantizer/utils.py b/backends/qualcomm/quantizer/utils.py
@@ -15,12 +15,19 @@
 from torch._ops import OpOverload
 from torch._subclasses import FakeTensor
 
+from torch.ao.quantization.fake_quantize import (
+    default_fake_quant,
+    default_per_channel_weight_fake_quant,
+    FusedMovingAvgObsFakeQuantize,
+)
+
 from torch.ao.quantization.observer import (
     FixedQParamsObserver,
     MinMaxObserver,
     MovingAverageMinMaxObserver,
     PerChannelMinMaxObserver,
     UniformQuantizationObserverBase,
+    MovingAveragePerChannelMinMaxObserver,
 )
 
 from torch.ao.quantization.quantizer import (
@@ -179,6 +186,44 @@ def _derive_bias_qparams_fn(
     )
 
 
+def get_default_8bit_qat_proto(act_symmetric: bool = False) -> QuantizationConfig:
+
+    act_quantization_spec = QuantizationSpec(
+        dtype=torch.uint8,
+        qscheme=(
+            torch.per_tensor_symmetric if act_symmetric else torch.per_tensor_affine
+        ),
+        ch_axis=0,
+        observer_or_fake_quant_ctr=default_fake_quant,
+    )
+
+    weight_quantization_spec = QuantizationSpec(
+        dtype=torch.int8,
+        quant_min=torch.iinfo(torch.int8).min + 1,
+        quant_max=torch.iinfo(torch.int8).max,
+        qscheme=torch.per_tensor_symmetric,
+        ch_axis=0,
+        observer_or_fake_quant_ctr=FusedMovingAvgObsFakeQuantize.with_args(observer=MovingAverageMinMaxObserver),
+    )
+
+    bias_quantization_spec = QuantizationSpec(
+        dtype=torch.int32,
+        quant_min=torch.iinfo(torch.int32).min,
+        quant_max=torch.iinfo(torch.int32).max,
+        qscheme=torch.per_tensor_symmetric,
+        observer_or_fake_quant_ctr=default_fake_quant,
+    )
+
+    quantization_config = QuantizationConfig(
+        input_activation=act_quantization_spec,
+        output_activation=act_quantization_spec,
+        weight=weight_quantization_spec,
+        bias=bias_quantization_spec,
+    )
+
+    return quantization_config
+
+
 def get_default_8bit_qnn_ptq_config(
     act_symmetric: bool = False, act_observer=MovingAverageMinMaxObserver
 ) -> QuantizationConfig:
diff --git a/backends/qualcomm/tests/test_qnn_delegate.py b/backends/qualcomm/tests/test_qnn_delegate.py
@@ -1045,6 +1045,26 @@ def test_qnn_backend_linear(self):
         module = self.get_qdq_module(module, sample_input)
         self.lower_module_and_test_output(module, sample_input)
 
+    def test_qnn_backend_linear_qat(self):
+        """
+        Prototype to test qat model
+        """
+        module = Linear()  # noqa: F405
+        sample_input = (torch.randn([3, 4]),)
+
+        module = self.get_prepared_qat_module(module, sample_input)
+
+        optimizer = torch.optim.SGD(module.parameters(), lr = 0.1)
+        criterion = torch.nn.CrossEntropyLoss()
+        output = module(*sample_input)
+        loss = criterion(output, module(*sample_input))
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        module = torch.ao.quantization.quantize_pt2e.convert_pt2e(module)
+        self.lower_module_and_test_output(module, sample_input)
+
     def test_qnn_backend_log_softmax(self):
         module = LogSoftmax()  # noqa: F405
         sample_input = (torch.randn([1, 4, 8, 8]),)
diff --git a/backends/qualcomm/tests/utils.py b/backends/qualcomm/tests/utils.py
@@ -20,6 +20,7 @@
 from executorch.backends.qualcomm.quantizer.quantizer import (
     get_16a4w_qnn_ptq_config,
     get_default_16bit_qnn_ptq_config,
+    get_default_8bit_qat_proto,
     QnnQuantizer,
     QuantDtype,
 )
@@ -44,7 +45,7 @@
 from executorch.exir.pass_base import ExportPass
 from executorch.exir.passes.memory_planning_pass import MemoryPlanningPass
 from executorch.exir.program import ExecutorchProgram, ExecutorchProgramManager
-from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e
+from torch.ao.quantization.quantize_pt2e import convert_pt2e, prepare_pt2e, prepare_qat_pt2e
 
 
 def generate_context_binary(
@@ -426,6 +427,30 @@ def get_qdq_module(
         self.assertTrue(nodes.intersection(q_and_dq))
         return quantized_module
 
+    def get_prepared_qat_module(
+        self,
+        module: torch.nn.Module,
+        inputs: Tuple[torch.Tensor],
+        is_conv_per_channel: Optional[bool] = True,
+        is_linear_per_channel: Optional[bool] = False,
+        custom_quant_annotations: Tuple[Callable] = (),
+        quant_dtype: QuantDtype = QuantDtype.use_8a8w,
+    ) -> torch.fx.GraphModule:
+        m = torch.export.export_for_training(module, inputs).module()
+
+        quantizer = QnnQuantizer()
+        quantizer.add_custom_quant_annotations(custom_quant_annotations)
+        quantizer.set_per_channel_conv_quant(is_conv_per_channel)
+        quantizer.set_per_channel_linear_quant(is_linear_per_channel)
+
+        if quant_dtype == QuantDtype.use_8a8w:
+            quantizer.set_bit8_op_quant_config(get_default_8bit_qat_proto())
+        else:
+            raise RuntimeError("Shuld not be here")
+
+        prepared = prepare_qat_pt2e(m, quantizer)
+        return torch.ao.quantization.move_exported_model_to_train(prepared)
+
     def split_graph(self, graph_module: torch.fx.GraphModule, division: int):
         class SplitGraph(ExportPass):
             """

Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@`
`28`	`28`	`get_default_16bit_qnn_ptq_config,`
`29`	`29`	`get_default_8bit_qnn_ptq_config,`
`30`	`30`	`get_ptq_per_channel_quant_config,`
	`31`	`+ get_default_8bit_qat_proto,`
`31`	`32`	`OP_ANNOTATOR,`
`32`	`33`	`QuantizationConfig,`
`33`	`34`	`)`
`@@ -39,6 +40,7 @@`
`39`	`40`	`"get_16a8w_qnn_ptq_config",`
`40`	`41`	`"get_default_16bit_qnn_ptq_config",`
`41`	`42`	`"get_default_8bit_qnn_ptq_config",`
	`43`	`+ "get_default_8bit_qat_proto",`
`42`	`44`	`]`
`43`	`45`
`44`	`46`