Add decomposition of SkipSimplifiedLayerNormalization

jorickert · jorickert · commit 0019e5017f3f · 2025-10-22T16:07:08.000+01:00
Signed-off-by: Rickert, Jonas &lt;Jonas.Rickert@amd.com&gt;
diff --git a/src/Dialect/ONNX/Transforms/Decompose.cpp b/src/Dialect/ONNX/Transforms/Decompose.cpp
@@ -3182,6 +3182,89 @@ struct MicrosoftSkipLayerNorm : public CustomOpToOnnxOps {
   }
 };
 
+struct MicrosoftSkipSimplifiedLayerNorm : public CustomOpToOnnxOps {
+  MicrosoftSkipSimplifiedLayerNorm(MLIRContext *ctx, PatternBenefit b = 1)
+      : CustomOpToOnnxOps(
+            ctx, MicrosoftDomainName, "SkipSimplifiedLayerNormalization", b) {}
+
+  LogicalResult matchAndRewriteImpl(
+      ONNXCustomOp customOp, PatternRewriter &rewriter) const final {
+    using namespace onnx_mlir;
+    Location loc = customOp.getLoc();
+    const int64_t numIn = customOp.getNumOperands();
+    assert((numIn >= 3 && numIn <= 4) && "expects 3..4 inputs");
+    const int64_t numOut = customOp.getNumResults();
+    assert((numOut >= 1 && numOut <= 4) && "expects 1..4 outputs");
+
+    MultiDialectBuilder<OnnxBuilder> create(rewriter, customOp->getLoc());
+
+    Value none = create.onnx.none();
+
+    Value input = customOp.getOperand(0);
+    Value skip = customOp.getOperand(1);
+    Value gamma = customOp.getOperand(2);
+    Value bias; // pre-norm bias
+
+    if (numIn >= 4)
+      bias = customOp.getOperand(3);
+
+    auto epsAttr = customOp->getAttrOfType<FloatAttr>("epsilon");
+    assert(epsAttr && "Expected Epsilon");
+
+    Value skipAdd = create.onnx.add(input, skip);
+    Value sumIS;
+    if (bias) {
+      sumIS = create.onnx.add(skipAdd, bias);
+    } else {
+      sumIS = skipAdd;
+      skipAdd = nullptr;
+    }
+
+    SmallVector<Type, 3> resultTypes;
+    resultTypes.push_back(customOp->getResultTypes()[0]);
+    resultTypes.push_back(
+        numOut > 1 ? customOp->getResultTypes()[1] : rewriter.getNoneType());
+    resultTypes.push_back(
+        numOut > 2 ? customOp->getResultTypes()[2] : rewriter.getNoneType());
+
+    const auto si64Type = rewriter.getIntegerType(64, /*signed*/ true);
+
+    const SmallVector<NamedAttribute, 5> simplifiedLayerNormAttrs{
+        rewriter.getNamedAttr(
+            "domain_name", rewriter.getStringAttr(DefaultONNXDomainName)),
+        rewriter.getNamedAttr("function_name",
+            rewriter.getStringAttr("SimplifiedLayerNormalization")),
+        rewriter.getNamedAttr("axis", rewriter.getIntegerAttr(si64Type, -1)),
+        rewriter.getNamedAttr("epsilon", epsAttr),
+        rewriter.getNamedAttr(
+            "stash_type", rewriter.getIntegerAttr(si64Type, 1))};
+
+    auto skipLayerNorm = rewriter.create<ONNXCustomOp>(
+        loc, resultTypes, ValueRange{sumIS, gamma}, simplifiedLayerNormAttrs);
+
+    SmallVector<Value, 4> replace;
+    replace.push_back(skipLayerNorm.getResult(0));
+    if (numOut >= 2)
+      replace.push_back(skipLayerNorm.getResult(1)); // mean
+    if (numOut >= 3)
+      replace.push_back(skipLayerNorm.getResult(2)); // inv_std_var
+    if (numOut == 4)
+      replace.push_back(sumIS); // input_skip_bias_sum
+
+    SmallVector<Value, 7> toCheck(replace.begin(), replace.end());
+    toCheck.push_back(none);
+    toCheck.push_back(skipAdd);
+    toCheck.push_back(sumIS);
+
+    if (failed(verifyOpsErasingOnError(toCheck, rewriter))) {
+      return rewriter.notifyMatchFailure(customOp, "Failed verification");
+    }
+
+    rewriter.replaceOp(customOp, replace);
+    return success();
+  }
+};
+
 template <typename OpToCreate>
 struct CustomOpMicrosoftToSingleOnnxOp : public CustomOpToOnnxOps {
   CustomOpMicrosoftToSingleOnnxOp(MLIRContext *context,
@@ -3594,6 +3677,7 @@ void onnx_mlir::getDecomposeONNXToONNXPatterns(
   patterns.insert<MicrosoftFusedConv>(context);
   patterns.insert<MicrosoftSkipLayerNorm>(context);
   patterns.insert<SimplifiedLayerNorm>(context);
+  patterns.insert<MicrosoftSkipSimplifiedLayerNorm>(context);
   patterns.insert<DecomposeSlicePadPattern>(context);
   patterns.insert<DecomposeScatterNDPattern>(context);
   patterns.insert<SoftmaxCrossEntropyPattern>(context);
diff --git a/test/mlir/onnx/onnx_decompose_customop.mlir b/test/mlir/onnx/onnx_decompose_customop.mlir
@@ -514,4 +514,96 @@ func.func @simplified_layernorm_two_outputs_mean_used(%input: tensor<2x4x8xf32>,
 // CHECK:           onnx.Return [[VAR_0_]]#0, [[VAR_0_]]#1 : tensor<2x4x8xf32>, tensor<2x4x1xf32>
 }
 
+// -----
+// SkipSimplifiedLayerNormalization: 3 inputs, 1 output
+
+func.func @skip_simplified_layernorm_basic(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>) -> tensor<2x4x8xf32> {
+  %r = "onnx.Custom"(%input, %skip, %gamma) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+  onnx.Return %r : tensor<2x4x8xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_basic
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>) -> tensor<2x4x8xf32> {
+// CHECK-DAG:       [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK-DAG:       [[VAR_1_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.RMSLayerNormalization"([[VAR_1_]], [[PARAM_2_]], [[VAR_0_]]) {axis = -1 : si64, epsilon = 9.99999974E-6 : f32, stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>, none) -> (tensor<2x4x8xf32>, none)
+// CHECK:           onnx.Return [[VAR_Y_]] : tensor<2x4x8xf32>
+}
+
+
+// -----
+// SkipSimplifiedLayerNormalization: 4 inputs (bias), 1 output
+
+func.func @skip_simplified_layernorm_bias(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>, %bias: tensor<8xf32>) -> tensor<2x4x8xf32> {
+  %r = "onnx.Custom"(%input, %skip, %gamma, %bias) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+  onnx.Return %r : tensor<2x4x8xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_bias
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>, [[PARAM_3_:%.+]]: tensor<8xf32>) -> tensor<2x4x8xf32> {
+// CHECK-DAG:       [[VAR_1_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_2_:%.+]] = "onnx.Add"([[VAR_1_]], [[PARAM_3_]]) : (tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.RMSLayerNormalization"([[VAR_2_]], [[PARAM_2_]], [[VAR_0_]]) {axis = -1 : si64, epsilon = 9.99999974E-6 : f32, stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>, none) -> (tensor<2x4x8xf32>, none)
+// CHECK:           onnx.Return [[VAR_Y_]] : tensor<2x4x8xf32>
+}
+
+
+
+// -----
+// SkipSimplifiedLayerNormalization: 4 inputs, 2 outputs (output, mean)
+
+func.func @skip_simplified_layernorm_two_outputs(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>, %bias: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>) {
+  %r0, %r1 = "onnx.Custom"(%input, %skip, %gamma, %bias) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>)
+  onnx.Return %r0, %r1 : tensor<2x4x8xf32>, tensor<2x4x1xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_two_outputs
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>, [[PARAM_3_:%.+]]: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>) {
+// CHECK:           [[VAR_0_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.Add"([[VAR_0_]], [[PARAM_3_]]) : (tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_2_:%.+]]:3 = "onnx.Custom"([[VAR_1_]], [[PARAM_2_]]) {axis = -1 : si64, domain_name = "", epsilon = 9.99999974E-6 : f32, function_name = "SimplifiedLayerNormalization", stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, none)
+// CHECK:           onnx.Return [[VAR_2_]]#0, [[VAR_2_]]#1 : tensor<2x4x8xf32>, tensor<2x4x1xf32>
+// CHECK:         }
+}
+
+
+// -----
+// SkipSimplifiedLayerNormalization: 4 inputs, 3 outputs (output, mean, inv_std_var)
+
+func.func @skip_simplified_layernorm_three_outputs(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>, %bias: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>) {
+  %r0, %r1, %r2 = "onnx.Custom"(%input, %skip, %gamma, %bias) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>)
+  onnx.Return %r0, %r1, %r2 : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_three_outputs
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>, [[PARAM_3_:%.+]]: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>) {
+// CHECK:           [[VAR_0_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.Add"([[VAR_0_]], [[PARAM_3_]]) : (tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_2_:%.+]]:3 = "onnx.Custom"([[VAR_1_]], [[PARAM_2_]]) {axis = -1 : si64, domain_name = "", epsilon = 9.99999974E-6 : f32, function_name = "SimplifiedLayerNormalization", stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>)
+// CHECK:           onnx.Return [[VAR_2_]]#0, [[VAR_2_]]#1, [[VAR_2_]]#2 : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>
+}
+
+
+// -----
+// SkipSimplifiedLayerNormalization: 4 inputs, 4 outputs (output, mean, inv_std_var, sum)
+
+func.func @skip_simplified_layernorm_four_outputs(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>, %bias: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>) {
+  %r0, %r1, %r2, %r3 = "onnx.Custom"(%input, %skip, %gamma, %bias) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>)
+  onnx.Return %r0, %r1, %r2, %r3 : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_four_outputs
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>, [[PARAM_3_:%.+]]: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>) {
+// CHECK:           [[VAR_0_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_1_:%.+]] = "onnx.Add"([[VAR_0_]], [[PARAM_3_]]) : (tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_2_:%.+]]:3 = "onnx.Custom"([[VAR_1_]], [[PARAM_2_]]) {axis = -1 : si64, domain_name = "", epsilon = 9.99999974E-6 : f32, function_name = "SimplifiedLayerNormalization", stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>)
+// CHECK:           onnx.Return [[VAR_2_]]#0, [[VAR_2_]]#1, [[VAR_2_]]#2, [[VAR_1_]] : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>
+// CHECK:         }
+}
+
+// -----
+// SkipSimplifiedLayerNormalization: 4 inputs, 4 outputs (output, mean, inv_std_var, sum), mean unused
+
+func.func @skip_simplified_layernorm_four_outputs_mean_unused(%input: tensor<2x4x8xf32>, %skip: tensor<2x4x8xf32>, %gamma: tensor<8xf32>, %bias: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>) {
+  %r0, %r1, %r2, %r3 = "onnx.Custom"(%input, %skip, %gamma, %bias) {domain_name = "com.microsoft", function_name = "SkipSimplifiedLayerNormalization", epsilon = 1.000000e-05 : f32} : (tensor<2x4x8xf32>, tensor<2x4x8xf32>, tensor<8xf32>, tensor<8xf32>) -> (tensor<2x4x8xf32>, none, tensor<2x4x1xf32>, tensor<2x4x8xf32>)
+  onnx.Return %r0, %r2, %r3 : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>
+// CHECK-LABEL:  func.func @skip_simplified_layernorm_four_outputs_mean_unused
+// CHECK-SAME:   ([[PARAM_0_:%.+]]: tensor<2x4x8xf32>, [[PARAM_1_:%.+]]: tensor<2x4x8xf32>, [[PARAM_2_:%.+]]: tensor<8xf32>, [[PARAM_3_:%.+]]: tensor<8xf32>) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>) {
+// CHECK-DAG:       [[VAR_0_:%.+]] = "onnx.NoValue"() {value} : () -> none
+// CHECK-DAG:       [[VAR_1_:%.+]] = "onnx.Add"([[PARAM_0_]], [[PARAM_1_]]) : (tensor<2x4x8xf32>, tensor<2x4x8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_2_:%.+]] = "onnx.Add"([[VAR_1_]], [[PARAM_3_]]) : (tensor<2x4x8xf32>, tensor<8xf32>) -> tensor<2x4x8xf32>
+// CHECK:           [[VAR_Y_:%.+]], [[VAR_InvStdDev_:%.+]] = "onnx.RMSLayerNormalization"([[VAR_2_]], [[PARAM_2_]], [[VAR_0_]]) {axis = -1 : si64, epsilon = 9.99999974E-6 : f32, stash_type = 1 : si64} : (tensor<2x4x8xf32>, tensor<8xf32>, none) -> (tensor<2x4x8xf32>, tensor<2x4x1xf32>)
+// CHECK:           onnx.Return [[VAR_Y_]], [[VAR_InvStdDev_]], [[VAR_2_]] : tensor<2x4x8xf32>, tensor<2x4x1xf32>, tensor<2x4x8xf32>
+}
+