[WebAssembly] Add patterns for relaxed madd (#147487)

badumbatish · web-flow · commit 6640b0a293c0 · 2025-07-15T00:56:28.000+08:00
[WebAssembly] Fold fadd contract (fmul contract) to relaxed madd w/ -mattr=+simd128,+relaxed-simd Fixes #121311 - Precommit test for #121311 - Fold fadd contract (fmul contract) to relaxed madd w/ -mattr=+simd128,+relaxed-simd - Move PatFrag of fadd_contract in ARM.td and WebAssembly.td to TargetSelectionDAG.td for reuse of pattern
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -1136,6 +1136,18 @@ def immAllOnesV  : SDPatternOperator; // ISD::isConstantSplatVectorAllOnes
 def immAllZerosV : SDPatternOperator; // ISD::isConstantSplatVectorAllZeros
 
 // Other helper fragments.
+
+// An 'fmul' node which has contract flag
+def fmul_contract : PatFrag<(ops node:$a, node:$b), (fmul node:$a, node:$b),[{
+  return N->getFlags().hasAllowContract();
+}]>;
+
+// An 'fadd' node which can be contracted with fmul_contract into a fma or other relaxed instruction
+def fadd_contract : PatFrag<(ops node:$a, node:$b), (fadd node:$a, node:$b),[{
+  return N->getFlags().hasAllowContract();
+}]>;
+
+
 def not  : PatFrag<(ops node:$in), (xor node:$in, -1)>;
 def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
 def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -486,11 +486,6 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
   return hasNoVMLxHazardUse(N);
 }]>;
 
-// An 'fadd' node which can be contracted into a fma
-def fadd_contract : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
-  return N->getFlags().hasAllowContract();
-}]>;
-
 def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
 def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
 
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -1536,6 +1536,10 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
              (vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
            vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
            vec.prefix#".relaxed_nmadd", simdopS, reqs>;
+
+  def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
+             (!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
+
 }
 
 defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fma.ll
@@ -0,0 +1,268 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers  -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers  -mattr=+fp16,+simd128,              | FileCheck %s --check-prefix=STRICT
+
+target triple = "wasm32"
+
+define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f64:
+; RELAXED:         .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f64.mul $push0=, $1, $0
+; RELAXED-NEXT:    f64.add $push1=, $pop0, $2
+; RELAXED-NEXT:    return $pop1
+;
+; STRICT-LABEL: fadd_fmul_contract_f64:
+; STRICT:         .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f64.mul $push0=, $1, $0
+; STRICT-NEXT:    f64.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul contract double %b, %a
+  %add = fadd contract double %mul, %c
+  ret double %add
+}
+
+define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fadd_fmul_contract_4xf32:
+; RELAXED:         .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fadd_fmul_contract_4xf32:
+; STRICT:         .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.mul $push0=, $1, $0
+; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul contract <4 x float> %b, %a
+  %add = fadd contract <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+
+define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fadd_fmul_contract_8xf16:
+; RELAXED:         .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f16x8.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fadd_fmul_contract_8xf16:
+; STRICT:         .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f16x8.mul $push0=, $1, $0
+; STRICT-NEXT:    f16x8.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul contract <8 x half> %b, %a
+  %add = fadd contract <8 x half> %mul, %c
+  ret <8 x half> %add
+}
+
+
+define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fadd_fmul_4xf32:
+; RELAXED:         .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.mul $push0=, $1, $0
+; RELAXED-NEXT:    f32x4.add $push1=, $pop0, $2
+; RELAXED-NEXT:    return $pop1
+;
+; STRICT-LABEL: fadd_fmul_4xf32:
+; STRICT:         .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.mul $push0=, $1, $0
+; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul <4 x float> %b, %a
+  %add = fadd contract <4 x float> %mul, %c
+  ret <4 x float> %add
+}
+
+define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fmuladd_contract_4xf32:
+; RELAXED:         .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $2, $0, $1
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fmuladd_contract_4xf32:
+; STRICT:         .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.mul $push0=, $0, $1
+; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %fma
+}
+
+; TODO: This should also have relaxed_madd in RELAXED case
+define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fmuladd_4xf32:
+; RELAXED:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.mul $push0=, $0, $1
+; RELAXED-NEXT:    f32x4.add $push1=, $pop0, $2
+; RELAXED-NEXT:    return $pop1
+;
+; STRICT-LABEL: fmuladd_4xf32:
+; STRICT:         .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.mul $push0=, $0, $1
+; STRICT-NEXT:    f32x4.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %fma
+}
+
+define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fma_4xf32:
+; RELAXED:         .functype fma_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.extract_lane $push2=, $0, 0
+; RELAXED-NEXT:    f32x4.extract_lane $push1=, $1, 0
+; RELAXED-NEXT:    f32x4.extract_lane $push0=, $2, 0
+; RELAXED-NEXT:    call $push3=, fmaf, $pop2, $pop1, $pop0
+; RELAXED-NEXT:    f32x4.splat $push4=, $pop3
+; RELAXED-NEXT:    f32x4.extract_lane $push7=, $0, 1
+; RELAXED-NEXT:    f32x4.extract_lane $push6=, $1, 1
+; RELAXED-NEXT:    f32x4.extract_lane $push5=, $2, 1
+; RELAXED-NEXT:    call $push8=, fmaf, $pop7, $pop6, $pop5
+; RELAXED-NEXT:    f32x4.replace_lane $push9=, $pop4, 1, $pop8
+; RELAXED-NEXT:    f32x4.extract_lane $push12=, $0, 2
+; RELAXED-NEXT:    f32x4.extract_lane $push11=, $1, 2
+; RELAXED-NEXT:    f32x4.extract_lane $push10=, $2, 2
+; RELAXED-NEXT:    call $push13=, fmaf, $pop12, $pop11, $pop10
+; RELAXED-NEXT:    f32x4.replace_lane $push14=, $pop9, 2, $pop13
+; RELAXED-NEXT:    f32x4.extract_lane $push17=, $0, 3
+; RELAXED-NEXT:    f32x4.extract_lane $push16=, $1, 3
+; RELAXED-NEXT:    f32x4.extract_lane $push15=, $2, 3
+; RELAXED-NEXT:    call $push18=, fmaf, $pop17, $pop16, $pop15
+; RELAXED-NEXT:    f32x4.replace_lane $push19=, $pop14, 3, $pop18
+; RELAXED-NEXT:    return $pop19
+;
+; STRICT-LABEL: fma_4xf32:
+; STRICT:         .functype fma_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.extract_lane $push2=, $0, 0
+; STRICT-NEXT:    f32x4.extract_lane $push1=, $1, 0
+; STRICT-NEXT:    f32x4.extract_lane $push0=, $2, 0
+; STRICT-NEXT:    call $push3=, fmaf, $pop2, $pop1, $pop0
+; STRICT-NEXT:    f32x4.splat $push4=, $pop3
+; STRICT-NEXT:    f32x4.extract_lane $push7=, $0, 1
+; STRICT-NEXT:    f32x4.extract_lane $push6=, $1, 1
+; STRICT-NEXT:    f32x4.extract_lane $push5=, $2, 1
+; STRICT-NEXT:    call $push8=, fmaf, $pop7, $pop6, $pop5
+; STRICT-NEXT:    f32x4.replace_lane $push9=, $pop4, 1, $pop8
+; STRICT-NEXT:    f32x4.extract_lane $push12=, $0, 2
+; STRICT-NEXT:    f32x4.extract_lane $push11=, $1, 2
+; STRICT-NEXT:    f32x4.extract_lane $push10=, $2, 2
+; STRICT-NEXT:    call $push13=, fmaf, $pop12, $pop11, $pop10
+; STRICT-NEXT:    f32x4.replace_lane $push14=, $pop9, 2, $pop13
+; STRICT-NEXT:    f32x4.extract_lane $push17=, $0, 3
+; STRICT-NEXT:    f32x4.extract_lane $push16=, $1, 3
+; STRICT-NEXT:    f32x4.extract_lane $push15=, $2, 3
+; STRICT-NEXT:    call $push18=, fmaf, $pop17, $pop16, $pop15
+; STRICT-NEXT:    f32x4.replace_lane $push19=, $pop14, 3, $pop18
+; STRICT-NEXT:    return $pop19
+  %fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
+  ret <4 x float> %fma
+}
+
+
+define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; RELAXED-LABEL: fadd_fmul_contract_8xf32:
+; RELAXED:         .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32x4.relaxed_madd $push0=, $6, $4, $2
+; RELAXED-NEXT:    v128.store 16($0), $pop0
+; RELAXED-NEXT:    f32x4.relaxed_madd $push1=, $5, $3, $1
+; RELAXED-NEXT:    v128.store 0($0), $pop1
+; RELAXED-NEXT:    return
+;
+; STRICT-LABEL: fadd_fmul_contract_8xf32:
+; STRICT:         .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32x4.mul $push0=, $4, $2
+; STRICT-NEXT:    f32x4.add $push1=, $pop0, $6
+; STRICT-NEXT:    v128.store 16($0), $pop1
+; STRICT-NEXT:    f32x4.mul $push2=, $3, $1
+; STRICT-NEXT:    f32x4.add $push3=, $pop2, $5
+; STRICT-NEXT:    v128.store 0($0), $pop3
+; STRICT-NEXT:    return
+  %mul = fmul contract <8 x float> %b, %a
+  %add = fadd contract <8 x float> %mul, %c
+  ret <8 x float> %add
+}
+
+
+define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fadd_fmul_contract_2xf64:
+; RELAXED:         .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f64x2.relaxed_madd $push0=, $2, $1, $0
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fadd_fmul_contract_2xf64:
+; STRICT:         .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f64x2.mul $push0=, $1, $0
+; STRICT-NEXT:    f64x2.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul contract <2 x double> %b, %a
+  %add = fadd contract <2 x double> %mul, %c
+  ret <2 x double> %add
+}
+
+define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fadd_fmul_contract_f32:
+; RELAXED:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    f32.mul $push0=, $1, $0
+; RELAXED-NEXT:    f32.add $push1=, $pop0, $2
+; RELAXED-NEXT:    return $pop1
+;
+; STRICT-LABEL: fadd_fmul_contract_f32:
+; STRICT:         .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    f32.mul $push0=, $1, $0
+; STRICT-NEXT:    f32.add $push1=, $pop0, $2
+; STRICT-NEXT:    return $pop1
+  %mul = fmul contract float %b, %a
+  %add = fadd contract float %mul, %c
+  ret float %add
+}
+
+define float @fma_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fma_f32:
+; RELAXED:         .functype fma_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    call $push0=, fmaf, $0, $1, $2
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fma_f32:
+; STRICT:         .functype fma_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    call $push0=, fmaf, $0, $1, $2
+; STRICT-NEXT:    return $pop0
+  %fma = call float @llvm.fma(float %a, float %b, float %c)
+  ret float %fma
+}
+
+define double @fma_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fma_f64:
+; RELAXED:         .functype fma_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT:  # %bb.0:
+; RELAXED-NEXT:    call $push0=, fma, $0, $1, $2
+; RELAXED-NEXT:    return $pop0
+;
+; STRICT-LABEL: fma_f64:
+; STRICT:         .functype fma_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT:  # %bb.0:
+; STRICT-NEXT:    call $push0=, fma, $0, $1, $2
+; STRICT-NEXT:    return $pop0
+  %fma = call double @llvm.fma(double %a, double %b, double %c)
+  ret double %fma
+}