[ExecuTorch][XNNPACK] Rename linear weight partitioning flag for clarity

digantdesai · digantdesai · commit c64b0e12e7c4 · 2025-02-28T12:16:21.000-08:00
Differential Revision: [D70372220](https://our.internmc.facebook.com/intern/diff/D70372220/) ghstack-source-id: 268924243 Pull Request resolved: #8844
diff --git a/backends/xnnpack/partition/config/gemm_configs.py b/backends/xnnpack/partition/config/gemm_configs.py
@@ -96,9 +96,9 @@ def _detect_precision(self, node: torch.fx.Node) -> ConfigPrecisionType:
     def _overwrite_precision(self, node: torch.fx.Node):
         precision = self._detect_precision(node)
         if precision not in self.enabled_precision_types:
-            # detected precision is not enabled, lets try to partition it as fp32
+            # detected precision is not enabled, try to partition it as fp32
             if self.enabled_precision_types == [ConfigPrecisionType.FP32]:
-                # if only fp32 is enabled, then we can still partition fp32 gemms
+                # when only fp32 is enabled, then we can still partition fp32 gemms
                 # even with in a quantized graph
                 if precision in [
                     ConfigPrecisionType.STATIC_QUANT,
@@ -107,6 +107,7 @@ def _overwrite_precision(self, node: torch.fx.Node):
                     precision = ConfigPrecisionType.FP32
                     logging.info(f"Overwriting precision, partitioning {node} as FP32")
                     return True, precision
+
         return False, precision
 
     def get_deps(
@@ -124,7 +125,6 @@ def get_deps(
             # detected precision but it is either disabled or not supported
             why(node, f"Unsupported precision type {precision}")
             return (False, [])
-        _, precision = self._overwrite_precision(node)
         valid_bias, bias_deps = self._get_bias_deps(node, ep, precision)
         valid_weight, weight_deps = self._get_weight_deps(node, ep, precision)
         valid_act, act_deps = self._get_act_deps(node, ep, precision)
@@ -139,6 +139,11 @@ def _get_weight_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
         gemm_deps = []
+        if precision == ConfigPrecisionType.FP32 and self.force_non_static_weights_for_f32_linear:
+            # if force_non_static_weights_for_f32_linear is enabled, then we
+            # do not partition the weight node
+            return (True, gemm_deps)
+
         if precision == ConfigPrecisionType.FP32:
             # First find the weight
             weight_node = get_input_node(node, self.weight_idx)
@@ -220,8 +225,8 @@ def _get_bias_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
         gemm_deps = []
-        if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
-            # if force force_fp32_dynamic_linear is enabled, then we
+        if precision == ConfigPrecisionType.FP32 and self.force_non_static_weights_for_f32_linear:
+            # if force for_fp32_linear_as_matmul is enabled, then we
             # do not partition the weight node
             return (True, gemm_deps)
 
@@ -299,11 +304,6 @@ def get_original_aten(self) -> Optional[torch._ops.OpOverload]:
     def _get_weight_deps(
         self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
     ) -> Tuple[bool, List[torch.fx.Node]]:
-        if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
-            # if force fp32_dynamic_linear is enabled, then we
-            # do not partition the weight node
-            return (True, [])
-
         # Since we are in Linear, we may assume that the weights are indeed static.
         overwritten_linear_precision, new_precision = self._overwrite_precision(node)
         if new_precision == ConfigPrecisionType.FP32 and overwritten_linear_precision:
@@ -403,17 +403,6 @@ def __init__(self, **kwargs):
         self.src_partitions = None
         self.linear_modules = [torch.nn.functional.linear, torch.nn.Linear]
 
-    def _get_weight_deps(
-        self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
-    ) -> Tuple[bool, List[torch.fx.Node]]:
-        # TODO(maxren, T210537195):
-        if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
-            # if force fp32_dynamic_linear is on and we detected this as fp32, then we
-            # do not partition the weight node
-            return (True, [])
-
-        return super()._get_weight_deps(node, ep, precision)
-
     def get_deps(
         self,
         node: torch.fx.Node,
@@ -495,11 +484,11 @@ def find_partition_args(input_node):
         node.args = old_args
         node.users = old_users
 
-        # When using force_fp32_dynamic_linear, we want to get_deps to overwrite the source partition nodes.
+        # When using force_non_static_weights_for_f32_linear, we want to get_deps to overwrite the source partition nodes.
         # Else we want to be greedy.
         ret_deps = (
             list(set(deps) & set(src_partition.nodes))
-            if self.force_fp32_dynamic_linear
+            if self.force_non_static_weights_for_f32_linear
             else list(set(deps) | set(src_partition.nodes))
         )
 
@@ -522,16 +511,6 @@ def __init__(self, **kwargs):
         self.weight_idx = 1
         self.act_idx = 0
 
-    def _get_weight_deps(
-        self, node: torch.fx.Node, ep: ExportedProgram, precision: ConfigPrecisionType
-    ) -> Tuple[bool, List[torch.fx.Node]]:
-        if precision == ConfigPrecisionType.FP32 and self.force_fp32_dynamic_linear:
-            # if force fp32_dynamic_linear is on and we detected this as fp32, then we
-            # do not partition the weight node
-            return (True, [])
-
-        return super()._get_weight_deps(node, ep, precision)
-
     def supported_precision_types(self):
         return [
             ConfigPrecisionType.FP32,
diff --git a/backends/xnnpack/partition/config/xnnpack_config.py b/backends/xnnpack/partition/config/xnnpack_config.py
@@ -41,7 +41,7 @@ def __init__(self, **kwargs):
         super().__init__()
         self.enabled_precision_types = self.supported_precision_types()
         # Flag used in GEMMConfig()
-        self.force_fp32_dynamic_linear = kwargs.get("force_fp32_dynamic_linear", False)
+        self.force_non_static_weights_for_f32_linear = kwargs.get("force_non_static_weights_for_f32_linear", False)
 
     def get_partition(
         self, node: torch.fx.Node, ep: ExportedProgram
diff --git a/backends/xnnpack/test/ops/test_linear.py b/backends/xnnpack/test/ops/test_linear.py
@@ -892,7 +892,7 @@ def test_linear_qd8_as_fp32(self):
                 },
             )
 
-    def test_linear_fp32_with_force_as_mm(self):
+    def test_linear_with_force_non_static_weights_for_f32_linear(self):
         def check_signature(
             signature: ExportGraphSignature,
             force_flag: bool,
@@ -925,7 +925,7 @@ def check_signature(
                     inputs = module.get_inputs()
                     tester = Tester(module, inputs).export()
                     partitioner = XnnpackPartitioner(
-                        force_fp32_dynamic_linear=force_flag
+                        force_non_static_weights_for_f32_linear=force_flag
                     )
                     if legacy_mode:
                         tester.to_edge()
diff --git a/backends/xnnpack/test/ops/test_lstm.py b/backends/xnnpack/test/ops/test_lstm.py
@@ -43,18 +43,18 @@ def test_fp32_lstm(self):
             .run_method_and_compare_outputs()
         )
 
-    def test_fp32_lstm_force_dynamic_linear(self):
+    def test_lstm_with_force_non_static_weights_for_f32_linear(self):
         (
             Tester(self.LSTMLinear(32, 32, 10), (torch.rand(1, 32, 32),))
             .export()
             .to_edge_transform_and_lower(
                 ToEdgeTransformAndLower(
-                    partitioners=[XnnpackPartitioner(force_fp32_dynamic_linear=True)]
+                    partitioners=[XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)]
                 )
             )
             .check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"])
             # Weights are supplied as input to linears
-            # Biases are not owned by delegates when force_fp32_dynamic_linear is set
+            # Biases are not owned by delegates when force_non_static_weights_for_f32_linear is set
             .check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"])
             .to_executorch()
             .serialize()

Original file line number	Diff line number	Diff line change
`@@ -43,18 +43,18 @@ def test_fp32_lstm(self):`
`43`	`43`	`.run_method_and_compare_outputs()`
`44`	`44`	`)`
`45`	`45`
`46`		`- def test_fp32_lstm_force_dynamic_linear(self):`
	`46`	`+ def test_lstm_with_force_non_static_weights_for_f32_linear(self):`
`47`	`47`	`(`
`48`	`48`	`Tester(self.LSTMLinear(32, 32, 10), (torch.rand(1, 32, 32),))`
`49`	`49`	`.export()`
`50`	`50`	`.to_edge_transform_and_lower(`
`51`	`51`	`ToEdgeTransformAndLower(`
`52`		`- partitioners=[XnnpackPartitioner(force_fp32_dynamic_linear=True)]`
	`52`	`+ partitioners=[XnnpackPartitioner(force_non_static_weights_for_f32_linear=True)]`
`53`	`53`	`)`
`54`	`54`	`)`
`55`	`55`	`.check_not(["executorch_exir_dialects_edge__ops_aten_addmm_default"])`
`56`	`56`	`# Weights are supplied as input to linears`
`57`		`- # Biases are not owned by delegates when force_fp32_dynamic_linear is set`
	`57`	`+ # Biases are not owned by delegates when force_non_static_weights_for_f32_linear is set`
`58`	`58`	`.check(["p_lstm_weight_hh_l0", "p_lstm_weight_ih_l0", "p_lstm_bias"])`
`59`	`59`	`.to_executorch()`
`60`	`60`	`.serialize()`