More test coverage, cleanup

GregoryComer · GregoryComer · commit 9325d087babc · 2026-01-20T14:42:43.000-08:00
diff --git a/backends/test/harness/tester.py b/backends/test/harness/tester.py
@@ -41,8 +41,12 @@ def __init__(
         example_inputs: Tuple[torch.Tensor],
         stage_classes: Dict[StageType, Callable] | None = None,
         dynamic_shapes: Optional[Tuple[Any]] = None,
+        training: bool = False,
     ):
-        module.eval()
+        if training:
+            module.train()
+        else:
+            module.eval()
 
         self.stage_classes = stage_classes or Tester.default_stage_classes()
         self.original_module = module
diff --git a/backends/xnnpack/_passes/decompose_batch_norm.py b/backends/xnnpack/_passes/decompose_batch_norm.py
@@ -72,9 +72,14 @@ def can_decompose_batch_norm(
                 why(node, f"Channel dimension must be statically known, but was {input_meta.shape[1]}.")
             return False
 
-        if not is_param_node(exported_program, node.args[1]) or not is_param_node(exported_program, node.args[2]):
+        if node.args[1] is not None and not is_param_node(exported_program, node.args[1]):
             if why:
-                why(node, "Batch norm affine weight and bias must be static.")
+                why(node, "Batch norm affine weight must be static.")
+            return False
+
+        if node.args[2] is not None and not is_param_node(exported_program, node.args[2]):
+            if why:
+                why(node, "Batch norm affine bias must be static.")
             return False
 
         if not is_param_node(exported_program, node.args[3]) or not is_param_node(exported_program, node.args[4]):
@@ -87,6 +92,11 @@ def can_decompose_batch_norm(
                 why(node, "Batch norm epsilon must be static.")
             return False
 
+        if node.target == exir_ops.edge.aten.native_batch_norm.default and node.args[5] is not False:
+            if why:
+                why(node, "Training batch norm is not supported.")
+            return False
+
         return True
 
     @staticmethod
@@ -103,11 +113,23 @@ def compute_w_and_b(
         """
 
         # See https://docs.pytorch.org/docs/stable/generated/torch.nn.BatchNorm1d.html
-        denom = torch.sqrt(running_var + torch.Tensor([eps]))
-        weight = gamma / denom
-        bias = -running_mean * gamma / denom + beta
 
-        return weight, bias
+        # Do the math in double precision and convert back to the original dtype at the
+        # end. ATen kernels do this math in increased precision for float16. Note that
+        # all of the parameter dtypes must match, as per the ATen behavior.
+
+        # Also note that gamma and beta can be None if affine=False. This is equivalent
+        # to gamma = 1 and beta = 0.
+        gamma_f64 = gamma.double() if gamma is not None else torch.Tensor([1]).double()
+        beta_f64 = beta.double() if beta is not None else torch.Tensor([0]).double()
+        running_mean_f64 = running_mean.double()
+        running_var_f64 = running_var.double()
+
+        denom = torch.sqrt(running_var_f64 + torch.Tensor([eps]))
+        new_weight = gamma_f64 / denom
+        new_bias = -running_mean_f64 * gamma_f64 / denom + beta_f64
+
+        return new_weight.to(running_mean.dtype), new_bias.to(running_mean.dtype)
 
     def replace_bn_node_with_conv(
         self,
diff --git a/backends/xnnpack/partition/config/node_configs.py b/backends/xnnpack/partition/config/node_configs.py
@@ -16,9 +16,6 @@
     XNNPartitionerConfig,
 )
 from executorch.backends.xnnpack.utils.utils import is_param_node
-from executorch.exir.backend.canonical_partitioners.config_partitioner import (
-    format_target_name,
-)
 from executorch.exir.backend.utils import WhyNoPartition
 from torch.export import ExportedProgram
 
diff --git a/backends/xnnpack/test/ops/test_batch_norm.py b/backends/xnnpack/test/ops/test_batch_norm.py
@@ -50,11 +50,11 @@ def get_inputs(self):
     class BatchNorm2d(torch.nn.Module):
         """BatchNorm2d with NCHW input (batch, channels, height, width)."""
 
-        def __init__(self, num_features: int, dtype: torch.dtype = torch.float):
+        def __init__(self, num_features: int, dtype: torch.dtype = torch.float, affine: bool = True):
             super().__init__()
             self.num_features = num_features
             self.dtype = dtype
-            self.bn = torch.nn.BatchNorm2d(num_features).to(dtype)
+            self.bn = torch.nn.BatchNorm2d(num_features, affine=affine).to(dtype)
 
         def forward(self, x):
             return self.bn(x)
@@ -154,6 +154,28 @@ def test_fp16_batch_norm_nchw(self):
         """Test BatchNorm2d with fp16 NCHW input is lowered to XNNPACK."""
         self._test_batch_norm(self.BatchNorm2d(num_features=3, dtype=torch.float16))
 
+    def test_fp32_batch_norm_nchw_non_affine(self):
+        """Test non-affine BatchNorm2d with NCHW input is lowered to XNNPACK."""
+        self._test_batch_norm(self.BatchNorm2d(num_features=3, affine=False))
+
+    class BatchNorm2dChannelsLast(torch.nn.Module):
+        """BatchNorm2d with channels_last memory format input."""
+
+        def __init__(self, num_features: int):
+            super().__init__()
+            self.num_features = num_features
+            self.bn = torch.nn.BatchNorm2d(num_features)
+
+        def forward(self, x):
+            return self.bn(x)
+
+        def get_inputs(self):
+            return (torch.randn(2, self.num_features, 4, 4).to(memory_format=torch.channels_last),)
+
+    def test_fp32_batch_norm_nchw_channels_last(self):
+        """Test BatchNorm2d with channels_last memory format input is lowered to XNNPACK."""
+        self._test_batch_norm(self.BatchNorm2dChannelsLast(num_features=3))
+
     class BatchNorm3d(torch.nn.Module):
         """BatchNorm3d with NCDHW input (batch, channels, depth, height, width)."""
 
@@ -277,3 +299,63 @@ def test_fp32_conv2d_batch_norm_fused(self):
             .serialize()
             .run_method_and_compare_outputs()
         )
+
+    class Conv2dBatchNormChannelsLast(torch.nn.Module):
+        """Conv2d followed by BatchNorm (fuseable pattern) with channels_last input."""
+
+        def __init__(self, in_channels: int, out_channels: int):
+            super().__init__()
+            self.in_channels = in_channels
+            self.conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
+            self.bn = randomize_bn(out_channels)
+
+        def forward(self, x):
+            x = self.conv(x)
+            x = self.bn(x)
+            return x
+
+        def get_inputs(self):
+            return (torch.randn(2, self.in_channels, 8, 8).to(memory_format=torch.channels_last),)
+
+    def test_fp32_conv2d_batch_norm_fused_channels_last(self):
+        """
+        Test Conv2d + BatchNorm with channels_last input where the BatchNorm is
+        fused into the Conv2d.
+        """
+        model = self.Conv2dBatchNormChannelsLast(in_channels=3, out_channels=8)
+        model.eval()
+
+        (
+            Tester(model, model.get_inputs())
+            .export()
+            .to_edge_transform_and_lower()
+            # BatchNorm should be fused into conv (not present in the graph)
+            .check_not(
+                [
+                    "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_no_training_default"
+                ]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 1})
+            .to_executorch()
+            .serialize()
+            .run_method_and_compare_outputs()
+        )
+
+    def test_training_bn_not_partitioned(self):
+        """Test that training mode BatchNorm is not partitioned."""
+        model = self.BatchNorm2d(num_features=3)
+        for _ in range(5):
+            model(*model.get_inputs())
+
+        (
+            Tester(model, model.get_inputs(), training=True)
+            .export()
+            .to_edge_transform_and_lower()
+            .check(
+                [
+                    "executorch_exir_dialects_edge__ops_aten__native_batch_norm_legit_functional"
+                ]
+            )
+            .check_count({"torch.ops.higher_order.executorch_call_delegate": 0})
+            .run_method_and_compare_outputs()
+        )
diff --git a/backends/xnnpack/test/passes/test_decompose_batch_norm.py b/backends/xnnpack/test/passes/test_decompose_batch_norm.py
@@ -9,6 +9,8 @@
 import torch
 from executorch.backends.xnnpack._passes.decompose_batch_norm import DecomposeBatchNorm
 from executorch.backends.xnnpack.test.tester import RunPasses, Tester
+from executorch.exir import EdgeProgramManager
+from executorch.exir.dialects._ops import ops as exir_ops
 
 
 class TestDecomposeBatchNorm(unittest.TestCase):
@@ -46,9 +48,9 @@ def forward(self, x):
     class BatchNorm2d(torch.nn.Module):
         """Simple BatchNorm2d module with NCHW input."""
 
-        def __init__(self, num_features: int):
+        def __init__(self, num_features: int, affine: bool = True):
             super().__init__()
-            self.bn = torch.nn.BatchNorm2d(num_features)
+            self.bn = torch.nn.BatchNorm2d(num_features, affine=affine)
             # Run a forward pass to update the BN running stats.
             self.forward(torch.randn(2, num_features, 4, 4) * 2 + 2)
 
@@ -57,9 +59,10 @@ def forward(self, x):
 
     def test_fp32_batch_norm_nc(self):
         """Test that BatchNorm1d with NC input is decomposed to convolution."""
-        (
+        model = self.BatchNorm1dNC(3).eval()
+        tester = (
             Tester(
-                self.BatchNorm1dNC(3).eval(),
+                model,
                 (torch.randn(2, 3),),
             )
             .export()
@@ -70,12 +73,14 @@ def test_fp32_batch_norm_nc(self):
             .check_not([self.bn_name])
             .run_method_and_compare_outputs()
         )
+        self._validate_decomposition(tester.get_artifact(), torch.float32, 3, 1)
 
     def test_fp32_batch_norm_ncl(self):
         """Test that BatchNorm1d with NCL input is decomposed to convolution."""
-        (
+        model = self.BatchNorm1dNCL(3).eval()
+        tester = (
             Tester(
-                self.BatchNorm1dNCL(3).eval(),
+                model,
                 (torch.randn(2, 3, 4),),
             )
             .export()
@@ -86,12 +91,50 @@ def test_fp32_batch_norm_ncl(self):
             .check_not([self.bn_name])
             .run_method_and_compare_outputs()
         )
+        self._validate_decomposition(tester.get_artifact(), torch.float32, 3, 1)
 
     def test_fp32_batch_norm_nchw(self):
         """Test that BatchNorm2d with NCHW input is decomposed to convolution."""
-        (
+        model = self.BatchNorm2d(3).eval()
+        tester = (
+            Tester(
+                model,
+                (torch.randn(2, 3, 4, 4),),
+            )
+            .export()
+            .to_edge()
+            .check_count({self.bn_name: 1})
+            .run_passes(self.PassStage)
+            .check_count({self.conv_name: 1})
+            .check_not([self.bn_name])
+            .run_method_and_compare_outputs()
+        )
+        self._validate_decomposition(tester.get_artifact(), torch.float32, 3, 2)
+
+    def test_fp16_batch_norm_nchw(self):
+        """Test that BatchNorm2d with NCHW input is decomposed to convolution."""
+        model = self.BatchNorm2d(3).to(torch.float16).eval()
+        tester = (
             Tester(
-                self.BatchNorm2d(3).eval(),
+                model,
+                (torch.randn(2, 3, 4, 4, dtype=torch.float16),),
+            )
+            .export()
+            .to_edge()
+            .check_count({self.bn_name: 1})
+            .run_passes(self.PassStage)
+            .check_count({self.conv_name: 1})
+            .check_not([self.bn_name])
+            .run_method_and_compare_outputs()
+        )
+        self._validate_decomposition(tester.get_artifact(), torch.float16, 3, 2)
+
+    def test_fp32_batch_norm_nchw_non_affine(self):
+        """Test that non-affine BatchNorm2d with NCHW input is decomposed to convolution."""
+        model = self.BatchNorm2d(3, affine=False).eval()
+        tester = (
+            Tester(
+                model,
                 (torch.randn(2, 3, 4, 4),),
             )
             .export()
@@ -102,3 +145,36 @@ def test_fp32_batch_norm_nchw(self):
             .check_not([self.bn_name])
             .run_method_and_compare_outputs()
         )
+        self._validate_decomposition(tester.get_artifact(), torch.float32, 3, 2)
+
+    def _validate_decomposition(self, edge_manager: EdgeProgramManager, dtype: torch.dtype, num_channels: int, spatial_dims: int):
+        # Verify that the graph contains a 1x1 depthwise convolution and that
+        # the transformed parameter dtypes match the original.
+
+        conv_node = next(
+            n
+            for n in edge_manager.exported_program().graph.nodes
+            if n.target == exir_ops.edge.aten.convolution.default
+        )
+        self.assertEqual(conv_node.meta["val"].dtype, dtype)
+
+        self.assertEqual(len(conv_node.args), 9)
+        _, w_node, b_node, stride, padding, dilation, transposed, output_padding, groups = conv_node.args
+
+        # Check the convolution parameters. It should be 1x1 depthwise convolution.
+        self.assertEqual(stride, [1] * spatial_dims)
+        self.assertEqual(padding, [0] * spatial_dims)
+        self.assertEqual(dilation, [1] * spatial_dims)
+        self.assertEqual(transposed, False)
+        self.assertEqual(output_padding, [0] * spatial_dims)
+        self.assertEqual(groups, num_channels)
+
+        w_meta = w_node.meta["val"]
+        b_meta = b_node.meta["val"]
+
+        # Weight should be (out_c, in_c/g, kH, [kW])
+        # Bias should be (out_c)
+        self.assertEqual(w_meta.shape, tuple([num_channels, 1] + [1] * spatial_dims))
+        self.assertEqual(w_meta.dtype, dtype)
+        self.assertEqual(b_meta.shape, (num_channels,))
+        self.assertEqual(b_meta.dtype, dtype)
diff --git a/backends/xnnpack/test/tester/tester.py b/backends/xnnpack/test/tester/tester.py
@@ -107,6 +107,7 @@ def __init__(
         module: torch.nn.Module,
         example_inputs: Tuple[torch.Tensor],
         dynamic_shapes: Optional[Tuple[Any]] = None,
+        **kwargs,
     ):
         # Specialize for XNNPACK
         stage_classes = (
@@ -127,4 +128,5 @@ def __init__(
             stage_classes=stage_classes,
             example_inputs=example_inputs,
             dynamic_shapes=dynamic_shapes,
+            **kwargs,
         )

Original file line number	Diff line number	Diff line change
`@@ -16,9 +16,6 @@`
`16`	`16`	`XNNPartitionerConfig,`
`17`	`17`	`)`
`18`	`18`	`from executorch.backends.xnnpack.utils.utils import is_param_node`
`19`		`-from executorch.exir.backend.canonical_partitioners.config_partitioner import (`
`20`		`- format_target_name,`
`21`		`-)`
`22`	`19`	`from executorch.exir.backend.utils import WhyNoPartition`
`23`	`20`	`from torch.export import ExportedProgram`
`24`	`21`