Fix DecomposeLayerNormPass to handle 6-arg layer_norm

Andrew Pullin · facebook-github-bot · commit cc936765e168 · 2026-01-09T09:44:01.000-08:00
Summary: ## Problem When using `nn.LayerNorm` in models that go through modai_sdk's `post_train_quantize` flow, the `DecomposeLayerNormPass` fails with: ``` ValueError: DecomposeLayerNormPass: too many values to unpack (expected 2) ``` This happens because `torch.ops.aten.layer_norm.default` has **6 arguments**: ``` layer_norm(input, normalized_shape, weight, bias, eps, cudnn_enable) ``` But `DecomposeLayerNormPass` only handled up to 5 arguments (for `native_layer_norm`). The error occurs during `transform_for_annotation_pipeline` in the ARM quantizer, which runs before edge transformation when the op is still `aten.layer_norm.default`. ## Solution Add `case 6:` to the `match len(args)` block in `DecomposeLayerNormPass.call()` to handle the 6th argument (`cudnn_enable`). This argument is simply ignored during decomposition since it's only relevant for cuDNN GPU optimization. ## Testing Added a new test file `test_layernorm_modai_compat.py` that: 1. Creates a simple Linear -> LayerNorm -> Linear model 2. Exports it via `torch.export` 3. Runs it through `transform_for_annotation_pipeline` (the exact path that was failing) 4. Verifies LayerNorm is decomposed correctly through the full TOSA pipelines --- > Generated by [Confucius Code Assist (CCA)](https://www.internalfb.com/wiki/Confucius/Analect/Shared_Analects/Confucius_Code_Assist_(CCA)/) [Confucius Session](https://www.internalfb.com/confucius?host=92481.od.fbinfra.net&port=8086&tab=Chat&session_id=eace3d92-ed78-11f0-b67c-c7843469b0d5&entry_name=Code+Assist), [Trace](https://www.internalfb.com/confucius?session_id=eace3d92-ed78-11f0-b67c-c7843469b0d5&tab=Trace) Differential Revision: D90395786
diff --git a/backends/arm/_passes/decompose_layernorm_pass.py b/backends/arm/_passes/decompose_layernorm_pass.py
@@ -90,6 +90,11 @@ def call(self, graph_module: torch.fx.GraphModule):
             args = node.args
             meta = node.meta
             match len(args):
+                case 6:
+                    # torch.ops.aten.layer_norm.default has 6 args:
+                    # (input, normalized_shape, weight, bias, eps, cudnn_enable)
+                    # cudnn_enable is not used in the decomposition
+                    x, normalized_shape, weights, bias, epsilon, _cudnn_enable = args
                 case 5:
                     x, normalized_shape, weights, bias, epsilon = args
                 case 4:
diff --git a/backends/arm/test/ops/test_layernorm_modai_compat.py b/backends/arm/test/ops/test_layernorm_modai_compat.py
@@ -0,0 +1,173 @@
+# Copyright 2025 Arm Limited and/or its affiliates.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+Test LayerNorm compatibility with modai_sdk lowering pipeline.
+
+This test verifies that nn.LayerNorm works correctly through the ARM backend's
+transform_for_annotation pipeline, which is used by modai_sdk during PTQ.
+
+The key issue was that torch.ops.aten.layer_norm.default has 6 arguments:
+(input, normalized_shape, weight, bias, eps, cudnn_enable)
+
+But DecomposeLayerNormPass only handled up to 5 args, causing a ValueError
+when the 6th arg (cudnn_enable) was present.
+
+Related: D88489694, T247846380
+"""
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+from executorch.backends.arm._passes.arm_pass_manager import ArmPassManager
+from executorch.backends.arm.test import common
+from executorch.backends.arm.test.tester.test_pipeline import (
+    EthosU55PipelineINT,
+    TosaPipelineFP,
+    TosaPipelineINT,
+)
+from executorch.backends.arm.tosa.specification import TosaSpecification
+
+input_t = Tuple[torch.Tensor]
+
+
+class SimpleLayerNormModel(nn.Module):
+    """Simple model: Linear -> LayerNorm -> Linear"""
+
+    def __init__(self, hidden_dim: int = 32):
+        super().__init__()
+        self.linear1 = nn.Linear(hidden_dim, hidden_dim)
+        self.layer_norm = nn.LayerNorm(hidden_dim)
+        self.linear2 = nn.Linear(hidden_dim, hidden_dim)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        x = self.linear1(x)
+        x = self.layer_norm(x)
+        x = self.linear2(x)
+        return x
+
+
+class LayerNormWithPermute(nn.Module):
+    """
+    LayerNorm with permute pattern (channels first).
+
+    This pattern is common in models like HRNet where the data is in
+    (B, C, H, L) format and needs to be permuted for LayerNorm.
+    """
+
+    def __init__(self, num_channels: int = 16):
+        super().__init__()
+        self.layer_norm = nn.LayerNorm(num_channels)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # x is (B, C, H, L) - channels first
+        x = torch.permute(x, (0, 2, 3, 1))  # (B, C, H, L) -> (B, H, L, C)
+        x = self.layer_norm(x)
+        x = torch.permute(x, (0, 3, 1, 2))  # (B, H, L, C) -> (B, C, H, L)
+        return x
+
+
+test_data_suite = {
+    "simple_1d": lambda: (
+        (torch.randn(1, 32),),
+        SimpleLayerNormModel(hidden_dim=32),
+    ),
+    "simple_2d": lambda: (
+        (torch.randn(1, 10, 32),),
+        SimpleLayerNormModel(hidden_dim=32),
+    ),
+    "with_permute": lambda: (
+        (torch.randn(1, 16, 4, 8),),
+        LayerNormWithPermute(num_channels=16),
+    ),
+}
+
+
+def test_layernorm_transform_for_annotation():
+    """
+    Test that LayerNorm works through transform_for_annotation pipeline.
+
+    This is the key test - it directly tests the pipeline that was failing
+    in modai_sdk when DecomposeLayerNormPass couldn't handle 6 args.
+    """
+    model = SimpleLayerNormModel(hidden_dim=16).eval()
+    sample_input = (torch.randn(1, 16),)
+
+    # Export the model
+    exported_program = torch.export.export(model, sample_input)
+    graph_module = exported_program.graph_module
+
+    # Debug: Print out what layer_norm nodes look like
+    print("\n=== Exported graph nodes ===")
+    for node in graph_module.graph.nodes:
+        if "layer_norm" in str(node.target):
+            print(f"Node: {node.name}")
+            print(f"  Target: {node.target}")
+            print(f"  Args count: {len(node.args)}")
+            print(f"  Args: {node.args}")
+            print(f"  Kwargs: {node.kwargs}")
+    print("=== End of layer_norm nodes ===\n")
+
+    # Create ArmPassManager with proper compile spec (similar to what modai_sdk does)
+    # ArmPassManager expects an ArmCompileSpec, not TosaSpecification directly
+    from executorch.backends.arm.test import common as ethos_common
+
+    compile_spec = ethos_common.get_tosa_compile_spec("TOSA-1.00+INT+FP")
+    pass_manager = ArmPassManager(compile_spec)
+
+    # This is the call that was failing before the fix
+    # It runs DecomposeLayerNormPass among other passes
+    try:
+        result = pass_manager.transform_for_annotation_pipeline(
+            graph_module=graph_module
+        )
+        assert result is not None, "transform_for_annotation_pipeline returned None"
+    except ValueError as e:
+        if "too many values to unpack" in str(e):
+            raise AssertionError(
+                f"DecomposeLayerNormPass failed to handle layer_norm args: {e}"
+            ) from e
+        raise
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_layernorm_tosa_FP(test_data):
+    """Test LayerNorm in TOSA FP pipeline."""
+    test_data, model = test_data()
+    pipeline = TosaPipelineFP[input_t](
+        model,
+        test_data,
+        "torch.ops.aten.layer_norm.default",
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+def test_layernorm_tosa_INT(test_data):
+    """Test LayerNorm in TOSA INT (quantized) pipeline."""
+    test_data, model = test_data()
+    pipeline = TosaPipelineINT[input_t](
+        model,
+        test_data,
+        # After decomposition, check for sub op which is part of layernorm
+        "torch.ops.aten.sub.Tensor",
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
+
+
+@common.parametrize("test_data", test_data_suite)
+@common.XfailIfNoCorstone300
+def test_layernorm_u55_INT(test_data):
+    """Test LayerNorm in Ethos U55 INT pipeline."""
+    test_data, model = test_data()
+    pipeline = EthosU55PipelineINT[input_t](
+        model,
+        test_data,
+        "torch.ops.aten.sub.Tensor",
+        symmetric_io_quantization=True,
+    )
+    pipeline.run()
diff --git a/backends/arm/test/targets.bzl b/backends/arm/test/targets.bzl
@@ -19,7 +19,8 @@ def define_arm_tests():
         "ops/test_avg_pool2d.py",
         "ops/test_cat.py",
         "ops/test_conv2d.py",
-        "ops/test_linear.py", 
+        "ops/test_layernorm_modai_compat.py",
+        "ops/test_linear.py",
         "ops/test_mul.py",
         "ops/test_permute.py",
         "ops/test_rsqrt.py",