DrJessop
diff --git a/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions b/‎backends/cadence/aot/TARGETS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎backends/cadence/aot/quantizer/fusion_pass.py‎
Lines changed: 30 additions & 147 deletions b/‎backends/cadence/aot/quantizer/fusion_pass.py‎
Lines changed: 30 additions & 147 deletions
diff --git a/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 12 additions & 9 deletions b/‎backends/cadence/aot/quantizer/patterns.py‎
Lines changed: 12 additions & 9 deletions
@@ -425,6 +425,7 @@ python_unittest(
         "//executorch/exir:pass_base",
         "//executorch/exir/dialects:lib",
         "//executorch/exir/passes:lib",
+        ":ref_implementations",
     ],
 )
 
 
@@ -66,33 +66,18 @@ def get_args_and_kwargs_add(
     dequants_inputs: List[fx.Node],
     quant_node: fx.Node,
 ) -> Tuple[Tuple[ArgsType, ...], Dict[str, ArgsType]]:
-    X_scale_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[1]),
-        {"dtype": torch.float},
-    )
-    X_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    Y_scale_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[1].args[1]),
-        {"dtype": torch.float},
-    )
-    Y_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[1].args[2]),
-        {"dtype": torch.int32},
-    )
+    X_scale = dequants_inputs[0].args[1]
+
+    X_zero_point = dequants_inputs[0].args[2]
+    Y_scale = dequants_inputs[1].args[1]
+    Y_zero_point = dequants_inputs[1].args[2]
     args = (
         inputs_inputs[0],
-        X_scale_,
-        X_zero_point_,
+        X_scale,
+        X_zero_point,
         inputs_inputs[1],
-        Y_scale_,
-        Y_zero_point_,
+        Y_scale,
+        Y_zero_point,
         quant_node.args[1],
         quant_node.args[2],
     )
@@ -130,31 +115,12 @@ def get_args_and_kwargs_linear(
     else:
         bias = bias_inputs[0]
 
-    # Create single element tensors for weight_zero_point, out_multiplier, out_shift.
-    # Note that the function expects int32_t, when it would default to int64_t, so
-    # we explicitly require that type.
-    weight_zero_point_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_weights[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
     args = tuple(inputs_inputs + weights_inputs + [bias])
     kwargs = {
         "src_zero_point": dequants_inputs[0].args[2],
-        "weight_zero_point": weight_zero_point_,
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "weight_zero_point": dequants_weights[0].args[2],
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
         "out_zero_point": quant_node.args[2],
         "offset": None,
     }
@@ -179,22 +145,8 @@ def get_args_and_kwargs_layer_norm(
     ), "per-channel quantization is not supported for layer norm, both scale and zero_point should be scalars"
 
     # Make the scale and zero_point tensors
-    scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[2],
-        ),
-        {"dtype": torch.int32},
-    )
+    scale = dequants_inputs[0].args[1]
+    zero_point = dequants_inputs[0].args[2]
 
     weight = other_inputs[1] if len(other_inputs) > 1 else None
 
@@ -221,7 +173,7 @@ def get_args_and_kwargs_layer_norm(
         )
 
     # Make the args and kwargs for the replacement op
-    args = tuple(inputs_inputs + [scale_tensor] + [zero_point_tensor])
+    args = tuple(inputs_inputs + [scale, zero_point])
     kwargs = {
         "normalized_shape": other_inputs[0],
         "weight": weight,
@@ -309,31 +261,6 @@ def get_args_and_kwargs_conv(
 
     (out_multiplier, out_shift) = quantize_tensor_multiplier(requantize_scale_t)
 
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the weight zero point
-    weight_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], weight_zero_point),
-        {"dtype": torch.int32},
-    )
-
-    # Create a single element tensor for the bias scale
-    bias_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], bias_scale),
-        {"dtype": torch.float32},
-    )
-
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs + weights_inputs + [bias])
     kwargs = {
@@ -342,12 +269,12 @@ def get_args_and_kwargs_conv(
         "dilation": dilation,
         "groups": groups,
         "input_zero_point": dequants_inputs[0].args[2],
-        "weight_zero_point": weight_zero_point_tensor,
-        "bias_scale": bias_scale_tensor,
+        "weight_zero_point": weight_zero_point,
+        "bias_scale": bias_scale,
         "out_scale": quant_node.args[1],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
@@ -368,27 +295,11 @@ def get_args_and_kwargs_relu(
     # Make the args and kwargs for the replacement op
     args = tuple(inputs_inputs)
 
-    X_zero_point = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], dequants_inputs[0].args[2]),
-        {"dtype": torch.int32},
-    )
-    out_multiplier_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_multiplier[0].item()),
-        {"dtype": torch.int32},
-    )
-    out_shift_ = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        ([1], out_shift[0].item()),
-        {"dtype": torch.int32},
-    )
-
     kwargs = {
-        "X_zero_point": X_zero_point,
+        "X_zero_point": dequants_inputs[0].args[2],
         "out_zero_point": quant_node.args[2],
-        "out_multiplier": out_multiplier_,
-        "out_shift": out_shift_,
+        "out_multiplier": out_multiplier[0].item(),
+        "out_shift": out_shift[0].item(),
     }
     return args, kwargs
 
@@ -436,48 +347,20 @@ def get_args_and_kwargs_softmax(
         {"dtype": torch.int32},
     )
     # Make the scale and zero_point tensors
-    in_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    in_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            dequants_inputs[0].args[2],
-        ),
-        {"dtype": torch.int32},
-    )
-    out_scale_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            quant_node.args[1],
-        ),
-        {"dtype": torch.float32},
-    )
-    out_zero_point_tensor = graph_module.graph.call_function(
-        torch.ops.aten.full.default,
-        (
-            [1],
-            quant_node.args[2],
-        ),
-        {"dtype": torch.int32},
-    )
+    in_scale = dequants_inputs[0].args[1]
+    in_zero_point = dequants_inputs[0].args[2]
+    out_scale = quant_node.args[1]
+    out_zero_point = quant_node.args[2]
 
     # Make the args and kwargs for the replacement op
     args = (
         inputs_inputs[0],
         mask_tensor,
         op_node.args[1],
-        in_scale_tensor,
-        in_zero_point_tensor,
-        out_scale_tensor,
-        out_zero_point_tensor,
+        in_scale,
+        in_zero_point,
+        out_scale,
+        out_zero_point,
     )
     kwargs = {}
 
 
@@ -112,7 +112,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_linear.default
+        return torch.ops.cadence.quantized_linear.per_tensor
 
 
 class AddPattern(QuantizationPattern):
@@ -150,7 +150,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_add.default
+        return torch.ops.cadence.quantized_add.per_tensor
 
 
 class BmmPattern(QuantizationPattern):
@@ -174,6 +174,8 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
+        # TODO: T240804887 This is actually a per-tensor variant,
+        # we just need to change the name of the op
         return torch.ops.cadence.quantized_matmul.default
 
 
@@ -265,7 +267,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class Conv2dPattern(QuantizationPattern):
@@ -307,7 +309,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 class LayerNormPattern(QuantizationPattern):
@@ -345,7 +347,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_layer_norm.default
+        return torch.ops.cadence.quantized_layer_norm.per_tensor
 
 
 class LinearPattern(QuantizationPattern):
@@ -387,7 +389,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_linear.default
+        return torch.ops.cadence.quantized_linear.per_tensor
 
 
 class MatmulPattern(QuantizationPattern):
@@ -411,6 +413,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
+        # TODO: T240804887 This is actually a per-tensor variant, we just need to change the name of the op
         return torch.ops.cadence.quantized_matmul.default
 
 
@@ -437,7 +440,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_relu.default
+        return torch.ops.cadence.quantized_relu.per_tensor
 
 
 # Regular relu op
@@ -496,7 +499,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_conv2d_nchw.default
+        return torch.ops.cadence.quantized_conv2d_nchw.per_tensor
 
 
 # Conv1d + regular relu op fusion
@@ -544,7 +547,7 @@ def get_anchors(
         )
 
     def replacement_op(self) -> OpOverload:
-        return torch.ops.cadence.quantized_softmax.default
+        return torch.ops.cadence.quantized_softmax.per_tensor
 
 
 class MixedW8A32LinearPattern(QuantizationPattern):
Original file line number	Diff line number	Diff line change
`@@ -425,6 +425,7 @@ python_unittest(`
`425`	`425`	`"//executorch/exir:pass_base",`
`426`	`426`	`"//executorch/exir/dialects:lib",`
`427`	`427`	`"//executorch/exir/passes:lib",`
	`428`	`+ ":ref_implementations",`
`428`	`429`	`],`
`429`	`430`	`)`
`430`	`431`
Original file line number	Diff line number	Diff line change
`@@ -112,7 +112,7 @@ def get_anchors(`
`112`	`112`	`)`
`113`	`113`
`114`	`114`	`def replacement_op(self) -> OpOverload:`
`115`		`- return torch.ops.cadence.quantized_linear.default`
	`115`	`+ return torch.ops.cadence.quantized_linear.per_tensor`
`116`	`116`
`117`	`117`
`118`	`118`	`class AddPattern(QuantizationPattern):`
`@@ -150,7 +150,7 @@ def get_anchors(`
`150`	`150`	`)`
`151`	`151`
`152`	`152`	`def replacement_op(self) -> OpOverload:`
`153`		`- return torch.ops.cadence.quantized_add.default`
	`153`	`+ return torch.ops.cadence.quantized_add.per_tensor`
`154`	`154`
`155`	`155`
`156`	`156`	`class BmmPattern(QuantizationPattern):`
`@@ -174,6 +174,8 @@ def get_anchors(`
`174`	`174`	`)`
`175`	`175`
`176`	`176`	`def replacement_op(self) -> OpOverload:`
	`177`	`+ # TODO: T240804887 This is actually a per-tensor variant,`
	`178`	`+ # we just need to change the name of the op`
`177`	`179`	`return torch.ops.cadence.quantized_matmul.default`
`178`	`180`
`179`	`181`
`@@ -265,7 +267,7 @@ def get_anchors(`
`265`	`267`	`)`
`266`	`268`
`267`	`269`	`def replacement_op(self) -> OpOverload:`
`268`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`270`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`269`	`271`
`270`	`272`
`271`	`273`	`class Conv2dPattern(QuantizationPattern):`
`@@ -307,7 +309,7 @@ def get_anchors(`
`307`	`309`	`)`
`308`	`310`
`309`	`311`	`def replacement_op(self) -> OpOverload:`
`310`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`312`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`311`	`313`
`312`	`314`
`313`	`315`	`class LayerNormPattern(QuantizationPattern):`
`@@ -345,7 +347,7 @@ def get_anchors(`
`345`	`347`	`)`
`346`	`348`
`347`	`349`	`def replacement_op(self) -> OpOverload:`
`348`		`- return torch.ops.cadence.quantized_layer_norm.default`
	`350`	`+ return torch.ops.cadence.quantized_layer_norm.per_tensor`
`349`	`351`
`350`	`352`
`351`	`353`	`class LinearPattern(QuantizationPattern):`
`@@ -387,7 +389,7 @@ def get_anchors(`
`387`	`389`	`)`
`388`	`390`
`389`	`391`	`def replacement_op(self) -> OpOverload:`
`390`		`- return torch.ops.cadence.quantized_linear.default`
	`392`	`+ return torch.ops.cadence.quantized_linear.per_tensor`
`391`	`393`
`392`	`394`
`393`	`395`	`class MatmulPattern(QuantizationPattern):`
`@@ -411,6 +413,7 @@ def get_anchors(`
`411`	`413`	`)`
`412`	`414`
`413`	`415`	`def replacement_op(self) -> OpOverload:`
	`416`	`+ # TODO: T240804887 This is actually a per-tensor variant, we just need to change the name of the op`
`414`	`417`	`return torch.ops.cadence.quantized_matmul.default`
`415`	`418`
`416`	`419`
`@@ -437,7 +440,7 @@ def get_anchors(`
`437`	`440`	`)`
`438`	`441`
`439`	`442`	`def replacement_op(self) -> OpOverload:`
`440`		`- return torch.ops.cadence.quantized_relu.default`
	`443`	`+ return torch.ops.cadence.quantized_relu.per_tensor`
`441`	`444`
`442`	`445`
`443`	`446`	`# Regular relu op`
`@@ -496,7 +499,7 @@ def get_anchors(`
`496`	`499`	`)`
`497`	`500`
`498`	`501`	`def replacement_op(self) -> OpOverload:`
`499`		`- return torch.ops.cadence.quantized_conv2d_nchw.default`
	`502`	`+ return torch.ops.cadence.quantized_conv2d_nchw.per_tensor`
`500`	`503`
`501`	`504`
`502`	`505`	`# Conv1d + regular relu op fusion`
`@@ -544,7 +547,7 @@ def get_anchors(`
`544`	`547`	`)`
`545`	`548`
`546`	`549`	`def replacement_op(self) -> OpOverload:`
`547`		`- return torch.ops.cadence.quantized_softmax.default`
	`550`	`+ return torch.ops.cadence.quantized_softmax.per_tensor`
`548`	`551`
`549`	`552`
`550`	`553`	`class MixedW8A32LinearPattern(QuantizationPattern):`