Update on "[ExecuTorch][XNNPACK] Rename linear weight partitioning flag for clarity"

digantdesai · digantdesai · commit 6c9249514258 · 2025-02-28T13:04:59.000-08:00
Differential Revision: [D70372220](https://our.internmc.facebook.com/intern/diff/D70372220/) [ghstack-poisoned]
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -125,6 +125,7 @@ def get_deps(
             # detected precision but it is either disabled or not supported
             why(node, f"Unsupported precision type {precision}")
             return (False, [])
+        _, precision = self._overwrite_precision(node)
         valid_bias, bias_deps = self._get_bias_deps(node, ep, precision)
         valid_weight, weight_deps = self._get_weight_deps(node, ep, precision)
         valid_act, act_deps = self._get_act_deps(node, ep, precision)
@@ -139,11 +140,6 @@ def _get_weight_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
         gemm_deps = []
-        if precision == ConfigPrecisionType.FP32 and self.force_non_static_weights_for_f32_linear:
-            # if force_non_static_weights_for_f32_linear is enabled, then we
-            # do not partition the weight node
-            return (True, gemm_deps)
-
         if precision == ConfigPrecisionType.FP32:
             # First find the weight
             weight_node = get_input_node(node, self.weight_idx)
@@ -225,8 +221,11 @@ def _get_bias_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
         gemm_deps = []
-        if precision == ConfigPrecisionType.FP32 and self.force_non_static_weights_for_f32_linear:
-            # if force for_fp32_linear_as_matmul is enabled, then we
+        if (
+            precision == ConfigPrecisionType.FP32
+            and self.force_non_static_weights_for_f32_linear
+        ):
+            # if force_non_static_weights_for_f32_linear is enabled, then we
             # do not partition the weight node
             return (True, gemm_deps)
 
@@ -304,6 +303,14 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]:
     def _get_weight_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
+        if (
+            precision == ConfigPrecisionType.FP32
+            and self.force_non_static_weights_for_f32_linear
+        ):
+            # if force_non_static_weights_for_f32_linear is enabled, then we
+            # do not partition the weight node
+            return (True, [])
+
         # Since we are in Linear, we may assume that the weights are indeed static.
         overwritten_linear_precision, new_precision = self._overwrite_precision(node)
         if new_precision == ConfigPrecisionType.FP32 and overwritten_linear_precision:
@@ -403,6 +410,19 @@ def __init__(self, **kwargs):
         self.src_partitions = None
         self.linear_modules = [torch.nn.functional.linear, torch.nn.Linear]
 
+    def _get_weight_deps(
+        self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
+    ) -> Tuple[bool, List[torch.fx.Node]]:
+        if (
+            precision == ConfigPrecisionType.FP32
+            and self.force_non_static_weights_for_f32_linear
+        ):
+            # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we
+            # do not partition the weight node
+            return (True, [])
+
+        return super()._get_weight_deps(node, ep, precision)
+
     def get_deps(
         self,
         node: torch.fx.Node,
@@ -511,6 +531,19 @@ def __init__(self, **kwargs):
         self.weight_idx = 1
         self.act_idx = 0
 
+    def _get_weight_deps(
+        self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
+    ) -> Tuple[bool, List[torch.fx.Node]]:
+        if (
+            precision == ConfigPrecisionType.FP32
+            and self.force_non_static_weights_for_f32_linear
+        ):
+            # if force_non_static_weights_for_f32_linear is on and we detected this as fp32, then we
+            # do not partition the weight node
+            return (True, [])
+
+        return super()._get_weight_deps(node, ep, precision)
+
     def supported_precision_types(self):
         return [
             ConfigPrecisionType.FP32,
diff --git a/backends/xnnpack/partition/config/xnnpack_config.py b/backends/xnnpack/partition/config/xnnpack_config.py
@@ -41,7 +41,9 @@ def __init__(self, **kwargs):
         super().__init__()
         self.enabled_precision_types = self.supported_precision_types()
         # Flag used in GEMMConfig()
-        self.force_non_static_weights_for_f32_linear = kwargs.get("force_non_static_weights_for_f32_linear", False)
+        self.force_non_static_weights_for_f32_linear = kwargs.get(
+            "force_non_static_weights_for_f32_linear", False
+        )
 
     def get_partition(
         self, node: torch.fx.Node, ep: ExportedProgram
diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py
@@ -49,7 +49,9 @@ def test_lstm_with_force_non_static_weights_for_f32_linear(self):
             .export()
             .to_edge_transform_and_lower(
                 ToEdgeTransformAndLower(
-                    partitioners=[XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)]
+                    partitioners=[
+                        XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)
+                    ]
                 )
             )
             .check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"])

Original file line number	Diff line number	Diff line change
`@@ -49,7 +49,9 @@ def test_lstm_with_force_non_static_weights_for_f32_linear(self):`
`49`	`49`	`.export()`
`50`	`50`	`.to_edge_transform_and_lower(`
`51`	`51`	`ToEdgeTransformAndLower(`
`52`		`- partitioners=[XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)]`
	`52`	`+ partitioners=[`
	`53`	`+ XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)`
	`54`	`+ ]`
`53`	`55`	`)`
`54`	`56`	`)`
`55`	`57`	`.check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"])`