Replace quantized conv and relu non-tensor variants with per tensor variants

Andrew Grebenisan · facebook-github-bot · commit 1e4a8c3cad84 · 2025-10-03T14:48:30.000-07:00
Summary: Fix to just call the per tensor variants for quantized conv and quantized relu, since those are the only ones we are supporting.

Differential Revision: D83873738
diff --git a/backends/cadence/aot/quantizer/fusion_pass.py b/backends/cadence/aot/quantizer/fusion_pass.py
@@ -306,31 +306,6 @@ def get_args_and_kwargs_conv(
 
     (out_multiplier, out_shift) = quantize_tensor_multiplier(requantize_scale_t)
 
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the weight zero point
-    weight_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], weight_zero_point),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the bias scale
-    bias_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], bias_scale),
-        {"dtype": torch.float32},
-    )
-
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs + weights_inputs + [bias])
     kwargs = {
@@ -339,12 +314,12 @@ def get_args_and_kwargs_conv(
         "dilation": dilation,
         "groups": groups,
         "input_zero_point": dequants_inputs[0].args[2],
-        "weight_zero_point": weight_zero_point_tensor,
-        "bias_scale": bias_scale_tensor,
+        "weight_zero_point": weight_zero_point,
+        "bias_scale": bias_scale,
         "out_scale": quant_node.args[1],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
@@ -365,27 +340,11 @@ def get_args_and_kwargs_relu(
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs)
 
-    X_zero_point = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
     kwargs = {
-        "X_zero_point": X_zero_point,
+        "X_zero_point": dequants_inputs[0].args[2],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
diff --git a/backends/cadence/aot/quantizer/patterns.py b/backends/cadence/aot/quantizer/patterns.py
@@ -265,7 +265,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class Conv2dPattern(QuantizationPattern):
@@ -307,7 +307,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class LayerNormPattern(QuantizationPattern):
@@ -437,7 +437,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_relu.default
+        return torch.ops.cadence.quantized_relu.per_tensor
 
 
 # Regular relu op
@@ -496,7 +496,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 # Conv1d + regular relu op fusion

Original file line number	Diff line number	Diff line change
`@@ -265,7 +265,7 @@ def get_anchors(`
`265`	`265`	`)`
`266`	`266`
`267`	`267`	`def replacement_op(self) -> OpOverload:`
`268`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`268`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`269`	`269`
`270`	`270`
`271`	`271`	`class Conv2dPattern(QuantizationPattern):`
`@@ -307,7 +307,7 @@ def get_anchors(`
`307`	`307`	`)`
`308`	`308`
`309`	`309`	`def replacement_op(self) -> OpOverload:`
`310`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`310`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`311`	`311`
`312`	`312`
`313`	`313`	`class LayerNormPattern(QuantizationPattern):`
`@@ -437,7 +437,7 @@ def get_anchors(`
`437`	`437`	`)`
`438`	`438`
`439`	`439`	`def replacement_op(self) -> OpOverload:`
`440`		`- return torch.ops.cadence.quantized_relu.default`
	`440`	`+ return torch.ops.cadence.quantized_relu.per_tensor`
`441`	`441`
`442`	`442`
`443`	`443`	`# Regular relu op`
`@@ -496,7 +496,7 @@ def get_anchors(`
`496`	`496`	`)`
`497`	`497`
`498`	`498`	`def replacement_op(self) -> OpOverload:`
`499`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`499`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`500`	`500`
`501`	`501`
`502`	`502`	`# Conv1d + regular relu op fusion`