Make the requant pass call the per_tensor overload

mcremon-meta · web-flow · commit aae0dba853b1 · 2025-06-19T10:06:45.000-07:00
Differential Revision: D74216340 Pull Request resolved: #11789
diff --git a/backends/cadence/aot/fuse_ops.py b/backends/cadence/aot/fuse_ops.py
@@ -712,32 +712,14 @@ def _create_requantize_node(
         out_dtype: torch.dtype,
         graph: torch.fx.Graph,
     ) -> torch.fx.Node:
-        in_scale_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default, args=((1,), in_scale)
-        )
-        in_zero_point_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default,
-            args=((1,), in_zero_point),
-            kwargs={"dtype": torch.int32},
-        )
-        out_scale_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default, args=((1,), out_scale)
-        )
-        out_zero_point_tensor = graph.call_function(
-            exir_ops.edge.aten.full.default,
-            args=((1,), out_zero_point),
-            kwargs={"dtype": torch.int32},
-        )
-        # cadence::requantize(Tensor input, Tensor in_scale, Tensor in_zero_point, Tensor out_scale, Tensor out_zero_point, ScalarType out_dtype) -> Tensor Y
-        # TODO(hardiksharma): Add support for per-tensor requantize.
         return graph.call_function(
-            exir_ops.edge.cadence.requantize.default,
+            exir_ops.edge.cadence.requantize.per_tensor,
             args=(
                 in_tensor,
-                in_scale_tensor,
-                in_zero_point_tensor,
-                out_scale_tensor,
-                out_zero_point_tensor,
+                in_scale,
+                in_zero_point,
+                out_scale,
+                out_zero_point,
                 out_dtype,
             ),
         )
diff --git a/backends/cadence/aot/remove_ops.py b/backends/cadence/aot/remove_ops.py
@@ -447,7 +447,7 @@ def call_operator(
         kwargs: dict[str, Argument],
         meta: NodeMetadata,
     ) -> ProxyValue:
-        if op != exir_ops.edge.cadence.requantize.default:
+        if op != exir_ops.edge.cadence.requantize.per_tensor:
             return super().call_operator(op, args, kwargs, meta)
 
         # Parse the args
diff --git a/backends/cadence/aot/tests/test_fusion_ops_passes.py b/backends/cadence/aot/tests/test_fusion_ops_passes.py
@@ -306,7 +306,7 @@ def test_force_quant_dequant_fusion(self) -> None:
                 # Verify that dequant/quant pair was replaced with requantize.
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
-                exir_ops.edge.cadence.requantize.default: 1,
+                exir_ops.edge.cadence.requantize.per_tensor: 1,
             },
         )
 
@@ -336,7 +336,7 @@ def test_no_replace_quant_permute_dequant_with_requantize(self) -> None:
                 # quantize -> permute -> dequantize should not be replaced with requantize.
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 1,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 1,
-                exir_ops.edge.cadence.requantize.default: 0,
+                exir_ops.edge.cadence.requantize.per_tensor: 0,
             },
         )
 
@@ -364,7 +364,7 @@ def test_replace_quant_view_dequant_with_requantize(self) -> None:
                 # Verify that dequant/quant pair was replaced with requantize.
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
-                exir_ops.edge.cadence.requantize.default: 1,
+                exir_ops.edge.cadence.requantize.per_tensor: 1,
             },
         )
 
@@ -390,7 +390,7 @@ def test_replace_dequant_quant_with_requantize(self) -> None:
                 # Verify that dequant -> quant was replaced with requantize.
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
-                exir_ops.edge.cadence.requantize.default: 1,
+                exir_ops.edge.cadence.requantize.per_tensor: 1,
             },
         )
 
@@ -420,7 +420,7 @@ def test_replace_dequant_permute_quant_with_requantize(self) -> None:
                 exir_ops.edge.quantized_decomposed.quantize_per_tensor.default: 0,
                 exir_ops.edge.quantized_decomposed.dequantize_per_tensor.default: 0,
                 exir_ops.edge.aten.permute_copy.default: 1,
-                exir_ops.edge.cadence.requantize.default: 1,
+                exir_ops.edge.cadence.requantize.per_tensor: 1,
             },
         )
 
diff --git a/backends/cadence/aot/tests/test_reorder_ops_passes.py b/backends/cadence/aot/tests/test_reorder_ops_passes.py
@@ -217,7 +217,7 @@ def test_advance_branched_quantize(self) -> None:
         self.assertEqual(
             count_node(
                 graph_module,
-                exir_ops.edge.cadence.requantize.default,
+                exir_ops.edge.cadence.requantize.per_tensor,
             ),
             1,
         )

Original file line number	Diff line number	Diff line change
`@@ -217,7 +217,7 @@ def test_advance_branched_quantize(self) -> None:`
`217`	`217`	`self.assertEqual(`
`218`	`218`	`count_node(`
`219`	`219`	`graph_module,`
`220`		`- exir_ops.edge.cadence.requantize.default,`
	`220`	`+ exir_ops.edge.cadence.requantize.per_tensor,`
`221`	`221`	`),`
`222`	`222`	`1,`
`223`	`223`	`)`