NXP backend: Quantize input placeholders in NeutronQuantizer

skywall · robert-kalmar · commit 320d28096f37 · 2025-08-01T13:51:55.000+02:00
diff --git a/backends/nxp/quantizer/neutron_quantizer.py b/backends/nxp/quantizer/neutron_quantizer.py
@@ -41,6 +41,7 @@
     no_outside_users,
 )
 from torch import fx
+from torch.ao.quantization.quantizer.utils import _annotate_output_qspec
 from torchao.quantization.pt2e import HistogramObserver, MinMaxObserver
 from torchao.quantization.pt2e.quantizer import (
     ComposableQuantizer,
@@ -237,6 +238,8 @@ def transform_for_annotation(
         return pass_runner(model).graph_module
 
     def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
+        self._annotate_inputs(model)
+
         nodes = list(model.graph.nodes)
         for node in nodes:
             if (
@@ -252,5 +255,25 @@ def annotate(self, model: torch.fx.GraphModule) -> torch.fx.GraphModule:
 
         return model
 
+    def _is_input_annotated(self, node: fx.Node) -> bool:
+        return (
+            "quantization_annotation" in node.meta
+            and node.meta["quantization_annotation"]._annotated
+        )
+
+    def _mark_input_node_as_annotated(self, node: fx.Node) -> None:
+        if "quantization_annotation" not in node.meta:
+            node.meta["quantization_annotation"] = QuantizationAnnotation()
+        node.meta["quantization_annotation"]._annotated = True
+
+    def _annotate_inputs(self, model: fx.GraphModule):
+        for node in model.graph.nodes:
+            if self._is_input_annotated(node):
+                continue
+
+            if node.op == "placeholder" and len(node.users) > 0:
+                _annotate_output_qspec(node, act_qspec)
+                self._mark_input_node_as_annotated(node)
+
     def validate(self, model: torch.fx.GraphModule) -> None:
         return super().validate(model)
diff --git a/backends/nxp/tests/test_quantizer.py b/backends/nxp/tests/test_quantizer.py
@@ -195,8 +195,8 @@ def test_quantizer_single_maxpool2d():
     m(*example_input)
 
     nodes = list(m.graph.nodes)
-    assert len(nodes) == 3
-    assert nodes[1].name == "max_pool2d"
+    assert len(nodes) == 7
+    assert nodes[3].name == "max_pool2d"
     assert "quantization_annotation" not in nodes[1].meta