Allow folding of an add into a layernorm if the bias is zero. Add missing shape check

jorickert · jorickert · commit 71948dbf9bcb · 2025-08-20T10:34:13.000+01:00
Signed-off-by: Rickert, Jonas &lt;Jonas.Rickert@amd.com&gt;
diff --git a/src/Dialect/ONNX/ONNXOps/Canonicalize.cpp b/src/Dialect/ONNX/ONNXOps/Canonicalize.cpp
@@ -1746,8 +1746,16 @@ struct PropagateBiasIntoLayerNormRewritePattern
     if (!yLayerNormOp->hasOneUse())
       return reportFailure("y/layer norm has too many uses");
     auto lnOp = mlir::cast<OP_TYPE>(yLayerNormOp);
-    if (!onnx_mlir::isNoneValue(lnOp.getB()))
+    if (!isValueNoneOrConstZero(lnOp.getB()))
       return reportFailure("layer norm already has a bias");
+
+    // Norms only support unidirectional broadcating from bias to y
+    const auto yType = dyn_cast<ShapedType>(y.getType());
+    const auto addType = dyn_cast<ShapedType>(addOp.getType());
+    if (!yType || !addType || !yType.hasStaticShape() ||
+        !addType.hasStaticShape() || yType.getShape() != addType.getShape()) {
+      return rewriter.notifyMatchFailure(addOp, "incompatible shapes");
+    }
     // We are fine.
     Value x = lnOp.getX();
     Value scale = lnOp.getScale();
diff --git a/test/mlir/onnx/onnx_canonicalization.mlir b/test/mlir/onnx/onnx_canonicalization.mlir
@@ -2392,6 +2392,20 @@ func.func @layernorm_without_bias(%arg0: tensor<1x384x768xf32>, %arg1: tensor<76
 
 // -----
 
+func.func @layernorm_with_zero_bias(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %bias: tensor<768xf32>) -> tensor<1x384x768xf32> {
+  %0 = onnx.Constant dense<0.000000e+00> : tensor<768xf32>
+  %NormScaled, %Mean, %InvStdDev = "onnx.LayerNormalization"(%arg0, %arg1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, tensor<768xf32>) -> (tensor<1x384x768xf32>, none, none)
+  %Y = "onnx.Add"(%bias, %NormScaled) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+  return %Y : tensor<1x384x768xf32>
+// CHECK-LABEL:  func.func @layernorm_with_zero_bias
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<768xf32>) -> tensor<1x384x768xf32> {
+// CHECK:           [[Y_:%.+]], [[Mean_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.LayerNormalization"([[PARAM_0_]], [[PARAM_1_]], [[PARAM_2_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, tensor<768xf32>) -> (tensor<1x384x768xf32>, none, none)
+// CHECK:           return [[Y_]] : tensor<1x384x768xf32>
+// CHECK:         }
+}
+
+// -----
+
 // Recognize the bias and fold into RMSLayerNorm.
 func.func @rmslayernorm_without_bias(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %bias: tensor<768xf32>) -> tensor<1x384x768xf32> {
   %0 = "onnx.NoValue"() {value} : () -> none