Add canonicalization pattern to move mul into (RMS)Layernorm

jorickert · jorickert · commit dbd0d83e7680 · 2025-08-20T10:34:13.000+01:00
Signed-off-by: Rickert, Jonas &lt;Jonas.Rickert@amd.com&gt;
diff --git a/src/Dialect/ONNX/ONNXOps/Canonicalize.cpp b/src/Dialect/ONNX/ONNXOps/Canonicalize.cpp
@@ -1633,6 +1633,89 @@ struct RecomposeConcatPattern : public OpRewritePattern<ONNXConcatOp> {
 // =============================================================================
 // Rewrite pattern LayerNormalization
 // =============================================================================
+namespace {
+bool isValueNoneOrConstZero(Value value) {
+  if (!value) {
+    return false;
+  }
+  if (isNoneValue(value)) {
+    return true;
+  }
+  auto elementsAttr = getElementAttributeFromONNXValue(value);
+  if (!elementsAttr) {
+    return false;
+  }
+  if (!elementsAttr.isSplat()) {
+    return false;
+  }
+  if (!elementsAttr.template getSplatValue<APFloat>().isZero()) {
+    return false;
+  }
+  return true;
+}
+} // namespace
+
+template <typename OP_TYPE>
+struct PropagateScaleIntoLayerNormPattern : public OpRewritePattern<ONNXMulOp> {
+  using OpRewritePattern<ONNXMulOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(
+      ONNXMulOp mulOp, PatternRewriter &rewriter) const final {
+    using namespace onnx_mlir;
+    Value y;
+    Value mulScale;
+    Operation *yLayerNormOp;
+    // Match
+    // %neutral = "onnx.Constant" {1.0}
+    // %y, %mean, %invStdDev = "onnx.LayerNormalization"(%x, %neutral, %noBias)
+    // %yScale = "onnx.Mul"(%y, %mulScale)
+    if (!onnx_mlir::operandOfOpDefinedBy<OP_TYPE>(
+            yLayerNormOp, mulOp, y, mulScale, 0) &&
+        !onnx_mlir::operandOfOpDefinedBy<OP_TYPE>(
+            yLayerNormOp, mulOp, mulScale, y, 1)) {
+      return rewriter.notifyMatchFailure(mulOp, "missing y, layer norm op");
+    }
+    if (!yLayerNormOp->hasOneUse()) {
+      return rewriter.notifyMatchFailure(
+          mulOp, "y/layer norm has too many uses");
+    }
+    OP_TYPE normOp = cast<OP_TYPE>(yLayerNormOp);
+    // Bias needs to be zero
+    if (!isValueNoneOrConstZero(normOp.getB())) {
+      return rewriter.notifyMatchFailure(
+          mulOp, "layer norm already has a bias");
+    }
+
+    auto existingScale = normOp.getScale();
+    auto elementsAttr = getElementAttributeFromONNXValue(existingScale);
+    if (!elementsAttr) {
+      return rewriter.notifyMatchFailure(
+          mulOp, "missing elements attribute or scale is not const");
+    }
+    if (!elementsAttr.isSplat()) {
+      return rewriter.notifyMatchFailure(mulOp, "scale is not a splat value");
+    }
+    if (!elementsAttr.template getSplatValue<APFloat>().isExactlyValue(1.0)) {
+      return rewriter.notifyMatchFailure(mulOp, "scale is not 1.0");
+    }
+    // Norms only support unidirectional broadcating from scale to y
+    const auto yType = dyn_cast<ShapedType>(y.getType());
+    const auto mulType = dyn_cast<ShapedType>(mulOp.getType());
+    if (!yType || !mulType || !yType.hasStaticShape() ||
+        !mulType.hasStaticShape() || yType.getShape() != mulType.getShape()) {
+      return rewriter.notifyMatchFailure(mulOp, "incompatible shapes");
+    }
+
+    rewriter.moveOpAfter(
+        normOp, mulOp); // Make sure we can use the const of the mul
+    rewriter.modifyOpInPlace(normOp, [&] {
+      normOp.setOperand(/*scale*/ 1, mulScale);
+      normOp->setLoc(rewriter.getFusedLoc({normOp.getLoc(), mulOp->getLoc()}));
+    });
+    rewriter.replaceOp(mulOp, normOp.getY());
+    return success();
+  }
+};
 
 template <typename OP_TYPE>
 struct PropagateBiasIntoLayerNormRewritePattern
@@ -2189,6 +2272,11 @@ void ONNXAddOp::getCanonicalizationPatterns(
   results.insert<FuseAddConvNullBiasPattern>(context);
   results.insert<BinaryOpBroadcastAxisPattern<ONNXAddOp>>(context);
   results.insert<PropagateScalarConstantExpandPattern<ONNXAddOp>>(context);
+  results.insert<PropagateScaleIntoLayerNormPattern<ONNXLayerNormalizationOp>>(
+      context);
+  results
+      .insert<PropagateScaleIntoLayerNormPattern<ONNXRMSLayerNormalizationOp>>(
+          context);
   results.insert<
       PropagateBiasIntoLayerNormRewritePattern<ONNXLayerNormalizationOp>>(
       context);
diff --git a/test/mlir/onnx/onnx_canonicalization.mlir b/test/mlir/onnx/onnx_canonicalization.mlir
@@ -2405,4 +2405,92 @@ func.func @rmslayernorm_without_bias(%arg0: tensor<1x384x768xf32>, %arg1: tensor
 // CHECK:         }
 }
 
+// -----
+
+// Recognize the scale and fold into LayerNorm.
+func.func @layernorm_with_neutral_scale(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %mulVal: tensor<768xf32>) -> tensor<1x384x768xf32> {
+  %0 = "onnx.NoValue"() {value} : () -> none
+  %1 = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+  %NormScaled, %Mean, %InvStdDev = "onnx.LayerNormalization"(%arg0, %1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+  %Y = "onnx.Mul"(%mulVal, %NormScaled) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+  return %Y : tensor<1x384x768xf32>
+// CHECK-LABEL:  func.func @layernorm_with_neutral_scale
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<768xf32>) -> tensor<1x384x768xf32> {
+// CHECK:           [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_Mean_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.LayerNormalization"([[PARAM_0_]], [[PARAM_2_]], [[VAR_0_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+// CHECK:           return [[VAR_Y_]] : tensor<1x384x768xf32>
+// CHECK:         }
+}
+
+
+// -----
+
+func.func @layernorm_scale_not_one(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %mulVal: tensor<768xf32>) -> tensor<1x384x768xf32> {
+  %0 = "onnx.NoValue"() {value} : () -> none
+  %1 = onnx.Constant dense<1.100000e+00> : tensor<768xf32>
+  %NormScaled, %Mean, %InvStdDev = "onnx.LayerNormalization"(%arg0, %1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+  %Y = "onnx.Mul"(%mulVal, %NormScaled) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+  return %Y : tensor<1x384x768xf32>
+// CHECK-LABEL:  func.func @layernorm_scale_not_one
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<768xf32>) -> tensor<1x384x768xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<1.100000e+00> : tensor<768xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_Mean_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.LayerNormalization"([[PARAM_0_]], [[VAR_1_]], [[VAR_0_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+// CHECK:           [[VAR_2_:%.+]] = "onnx.Mul"([[PARAM_2_]], [[VAR_Y_]]) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+// CHECK:           return [[VAR_2_]] : tensor<1x384x768xf32>
+// CHECK:         }
+}
+
+// -----
+
+func.func @layernorm_bias_not_zero(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %mulVal: tensor<768xf32>) -> tensor<1x384x768xf32> {
+  %0 = onnx.Constant dense<2.000000e+00> : tensor<768xf32>
+  %1 = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+  %NormScaled, %Mean, %InvStdDev = "onnx.LayerNormalization"(%arg0, %1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, tensor<768xf32>) -> (tensor<1x384x768xf32>, none, none)
+  %Y = "onnx.Mul"(%mulVal, %NormScaled) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+  return %Y : tensor<1x384x768xf32>
+// CHECK-LABEL:  func.func @layernorm_bias_not_zero
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<768xf32>) -> tensor<1x384x768xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = onnx.Constant dense<2.000000e+00> : tensor<768xf32>
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_Mean_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.LayerNormalization"([[PARAM_0_]], [[VAR_1_]], [[VAR_0_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, tensor<768xf32>) -> (tensor<1x384x768xf32>, none, none)
+// CHECK:           [[VAR_2_:%.+]] = "onnx.Mul"([[PARAM_2_]], [[VAR_Y_]]) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+// CHECK:           return [[VAR_2_]] : tensor<1x384x768xf32>
+// CHECK:         }
+}
+
+// -----
+
+func.func @layernorm_broadcast(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %mulVal: tensor<10x384x768xf32>) -> tensor<10x384x768xf32> {
+  %0 = "onnx.NoValue"() {value} : () -> none
+  %1 = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+  %NormScaled, %Mean, %InvStdDev = "onnx.LayerNormalization"(%arg0, %1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+  %Y = "onnx.Mul"(%mulVal, %NormScaled) : (tensor<10x384x768xf32>, tensor<1x384x768xf32>) -> tensor<10x384x768xf32>
+  return %Y : tensor<10x384x768xf32>
+// CHECK-LABEL:  func.func @layernorm_broadcast
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<10x384x768xf32>) -> tensor<10x384x768xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK-DAG:       [[VAR_1_:%.+]] = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_Mean_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.LayerNormalization"([[PARAM_0_]], [[VAR_1_]], [[VAR_0_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none, none)
+// CHECK:           [[VAR_2_:%.+]] = "onnx.Mul"([[PARAM_2_]], [[VAR_Y_]]) : (tensor<10x384x768xf32>, tensor<1x384x768xf32>) -> tensor<10x384x768xf32>
+// CHECK:           return [[VAR_2_]] : tensor<10x384x768xf32>
+// CHECK:         }
+}
+
+// -----
+
+// Recognize the scale and fold into the RMSNorm.
+func.func @rmslayernorm_with_neutral_scale(%arg0: tensor<1x384x768xf32>, %arg1: tensor<768xf32>, %mulVal: tensor<768xf32>) -> tensor<1x384x768xf32> {
+  %0 = "onnx.NoValue"() {value} : () -> none
+  %1 = onnx.Constant dense<1.000000e+00> : tensor<768xf32>
+  %NormScaled, %InvStdDev = "onnx.RMSLayerNormalization"(%arg0, %1, %0) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none)
+  %Y = "onnx.Mul"(%mulVal, %NormScaled) : (tensor<768xf32>, tensor<1x384x768xf32>) -> tensor<1x384x768xf32>
+  return %Y : tensor<1x384x768xf32>
+// CHECK-LABEL:  func.func @rmslayernorm_with_neutral_scale
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<1x384x768xf32>, [[PARAM_1_:%.+]]: tensor<768xf32>, [[PARAM_2_:%.+]]: tensor<768xf32>) -> tensor<1x384x768xf32> {
+// CHECK:           [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.RMSLayerNormalization"([[PARAM_0_]], [[PARAM_2_]], [[VAR_0_]]) {axis = 2 : si64, epsilon = 1.200000e+00 : f32, stash_type = 1 : si64} : (tensor<1x384x768xf32>, tensor<768xf32>, none) -> (tensor<1x384x768xf32>, none)
+// CHECK:           return [[VAR_Y_]] : tensor<1x384x768xf32>
+// CHECK:         }
+}