Skip to content

Commit 6640b0a

Browse files
authored
[WebAssembly] Add patterns for relaxed madd (#147487)
[WebAssembly] Fold fadd contract (fmul contract) to relaxed madd w/ -mattr=+simd128,+relaxed-simd Fixes #121311 - Precommit test for #121311 - Fold fadd contract (fmul contract) to relaxed madd w/ -mattr=+simd128,+relaxed-simd - Move PatFrag of fadd_contract in ARM.td and WebAssembly.td to TargetSelectionDAG.td for reuse of pattern
1 parent f4630ba commit 6640b0a

File tree

4 files changed

+284
-5
lines changed

4 files changed

+284
-5
lines changed

llvm/include/llvm/Target/TargetSelectionDAG.td

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,6 +1136,18 @@ def immAllOnesV : SDPatternOperator; // ISD::isConstantSplatVectorAllOnes
11361136
def immAllZerosV : SDPatternOperator; // ISD::isConstantSplatVectorAllZeros
11371137

11381138
// Other helper fragments.
1139+
1140+
// An 'fmul' node which has contract flag
1141+
def fmul_contract : PatFrag<(ops node:$a, node:$b), (fmul node:$a, node:$b),[{
1142+
return N->getFlags().hasAllowContract();
1143+
}]>;
1144+
1145+
// An 'fadd' node which can be contracted with fmul_contract into a fma or other relaxed instruction
1146+
def fadd_contract : PatFrag<(ops node:$a, node:$b), (fadd node:$a, node:$b),[{
1147+
return N->getFlags().hasAllowContract();
1148+
}]>;
1149+
1150+
11391151
def not : PatFrag<(ops node:$in), (xor node:$in, -1)>;
11401152
def vnot : PatFrag<(ops node:$in), (xor node:$in, immAllOnesV)>;
11411153
def ineg : PatFrag<(ops node:$in), (sub 0, node:$in)>;

llvm/lib/Target/ARM/ARMInstrInfo.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -486,11 +486,6 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
486486
return hasNoVMLxHazardUse(N);
487487
}]>;
488488

489-
// An 'fadd' node which can be contracted into a fma
490-
def fadd_contract : PatFrag<(ops node:$lhs, node:$rhs),(fadd node:$lhs, node:$rhs),[{
491-
return N->getFlags().hasAllowContract();
492-
}]>;
493-
494489
def imm_even : ImmLeaf<i32, [{ return (Imm & 1) == 0; }]>;
495490
def imm_odd : ImmLeaf<i32, [{ return (Imm & 1) == 1; }]>;
496491

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,10 @@ multiclass SIMDMADD<Vec vec, bits<32> simdopA, bits<32> simdopS, list<Predicate>
15361536
(vec.vt V128:$a), (vec.vt V128:$b), (vec.vt V128:$c)))],
15371537
vec.prefix#".relaxed_nmadd\t$dst, $a, $b, $c",
15381538
vec.prefix#".relaxed_nmadd", simdopS, reqs>;
1539+
1540+
def : Pat<(fadd_contract (vec.vt V128:$a), (fmul_contract (vec.vt V128:$b), (vec.vt V128:$c))),
1541+
(!cast<Instruction>("MADD_"#vec) V128:$a, V128:$b, V128:$c)>, Requires<[HasRelaxedSIMD]>;
1542+
15391543
}
15401544

15411545
defm "" : SIMDMADD<F32x4, 0x105, 0x106, [HasRelaxedSIMD]>;
Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
3+
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
4+
; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
5+
6+
target triple = "wasm32"
7+
8+
define double @fadd_fmul_contract_f64(double %a, double %b, double %c) {
9+
; RELAXED-LABEL: fadd_fmul_contract_f64:
10+
; RELAXED: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
11+
; RELAXED-NEXT: # %bb.0:
12+
; RELAXED-NEXT: f64.mul $push0=, $1, $0
13+
; RELAXED-NEXT: f64.add $push1=, $pop0, $2
14+
; RELAXED-NEXT: return $pop1
15+
;
16+
; STRICT-LABEL: fadd_fmul_contract_f64:
17+
; STRICT: .functype fadd_fmul_contract_f64 (f64, f64, f64) -> (f64)
18+
; STRICT-NEXT: # %bb.0:
19+
; STRICT-NEXT: f64.mul $push0=, $1, $0
20+
; STRICT-NEXT: f64.add $push1=, $pop0, $2
21+
; STRICT-NEXT: return $pop1
22+
%mul = fmul contract double %b, %a
23+
%add = fadd contract double %mul, %c
24+
ret double %add
25+
}
26+
27+
define <4 x float> @fadd_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
28+
; RELAXED-LABEL: fadd_fmul_contract_4xf32:
29+
; RELAXED: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
30+
; RELAXED-NEXT: # %bb.0:
31+
; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $1, $0
32+
; RELAXED-NEXT: return $pop0
33+
;
34+
; STRICT-LABEL: fadd_fmul_contract_4xf32:
35+
; STRICT: .functype fadd_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
36+
; STRICT-NEXT: # %bb.0:
37+
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
38+
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
39+
; STRICT-NEXT: return $pop1
40+
%mul = fmul contract <4 x float> %b, %a
41+
%add = fadd contract <4 x float> %mul, %c
42+
ret <4 x float> %add
43+
}
44+
45+
46+
define <8 x half> @fadd_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
47+
; RELAXED-LABEL: fadd_fmul_contract_8xf16:
48+
; RELAXED: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
49+
; RELAXED-NEXT: # %bb.0:
50+
; RELAXED-NEXT: f16x8.relaxed_madd $push0=, $2, $1, $0
51+
; RELAXED-NEXT: return $pop0
52+
;
53+
; STRICT-LABEL: fadd_fmul_contract_8xf16:
54+
; STRICT: .functype fadd_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
55+
; STRICT-NEXT: # %bb.0:
56+
; STRICT-NEXT: f16x8.mul $push0=, $1, $0
57+
; STRICT-NEXT: f16x8.add $push1=, $pop0, $2
58+
; STRICT-NEXT: return $pop1
59+
%mul = fmul contract <8 x half> %b, %a
60+
%add = fadd contract <8 x half> %mul, %c
61+
ret <8 x half> %add
62+
}
63+
64+
65+
define <4 x float> @fadd_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
66+
; RELAXED-LABEL: fadd_fmul_4xf32:
67+
; RELAXED: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
68+
; RELAXED-NEXT: # %bb.0:
69+
; RELAXED-NEXT: f32x4.mul $push0=, $1, $0
70+
; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
71+
; RELAXED-NEXT: return $pop1
72+
;
73+
; STRICT-LABEL: fadd_fmul_4xf32:
74+
; STRICT: .functype fadd_fmul_4xf32 (v128, v128, v128) -> (v128)
75+
; STRICT-NEXT: # %bb.0:
76+
; STRICT-NEXT: f32x4.mul $push0=, $1, $0
77+
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
78+
; STRICT-NEXT: return $pop1
79+
%mul = fmul <4 x float> %b, %a
80+
%add = fadd contract <4 x float> %mul, %c
81+
ret <4 x float> %add
82+
}
83+
84+
define <4 x float> @fmuladd_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
85+
; RELAXED-LABEL: fmuladd_contract_4xf32:
86+
; RELAXED: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
87+
; RELAXED-NEXT: # %bb.0:
88+
; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $2, $0, $1
89+
; RELAXED-NEXT: return $pop0
90+
;
91+
; STRICT-LABEL: fmuladd_contract_4xf32:
92+
; STRICT: .functype fmuladd_contract_4xf32 (v128, v128, v128) -> (v128)
93+
; STRICT-NEXT: # %bb.0:
94+
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
95+
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
96+
; STRICT-NEXT: return $pop1
97+
%fma = call contract <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
98+
ret <4 x float> %fma
99+
}
100+
101+
; TODO: This should also have relaxed_madd in RELAXED case
102+
define <4 x float> @fmuladd_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
103+
; RELAXED-LABEL: fmuladd_4xf32:
104+
; RELAXED: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
105+
; RELAXED-NEXT: # %bb.0:
106+
; RELAXED-NEXT: f32x4.mul $push0=, $0, $1
107+
; RELAXED-NEXT: f32x4.add $push1=, $pop0, $2
108+
; RELAXED-NEXT: return $pop1
109+
;
110+
; STRICT-LABEL: fmuladd_4xf32:
111+
; STRICT: .functype fmuladd_4xf32 (v128, v128, v128) -> (v128)
112+
; STRICT-NEXT: # %bb.0:
113+
; STRICT-NEXT: f32x4.mul $push0=, $0, $1
114+
; STRICT-NEXT: f32x4.add $push1=, $pop0, $2
115+
; STRICT-NEXT: return $pop1
116+
%fma = call <4 x float> @llvm.fmuladd(<4 x float> %a, <4 x float> %b, <4 x float> %c)
117+
ret <4 x float> %fma
118+
}
119+
120+
define <4 x float> @fma_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
121+
; RELAXED-LABEL: fma_4xf32:
122+
; RELAXED: .functype fma_4xf32 (v128, v128, v128) -> (v128)
123+
; RELAXED-NEXT: # %bb.0:
124+
; RELAXED-NEXT: f32x4.extract_lane $push2=, $0, 0
125+
; RELAXED-NEXT: f32x4.extract_lane $push1=, $1, 0
126+
; RELAXED-NEXT: f32x4.extract_lane $push0=, $2, 0
127+
; RELAXED-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
128+
; RELAXED-NEXT: f32x4.splat $push4=, $pop3
129+
; RELAXED-NEXT: f32x4.extract_lane $push7=, $0, 1
130+
; RELAXED-NEXT: f32x4.extract_lane $push6=, $1, 1
131+
; RELAXED-NEXT: f32x4.extract_lane $push5=, $2, 1
132+
; RELAXED-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
133+
; RELAXED-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
134+
; RELAXED-NEXT: f32x4.extract_lane $push12=, $0, 2
135+
; RELAXED-NEXT: f32x4.extract_lane $push11=, $1, 2
136+
; RELAXED-NEXT: f32x4.extract_lane $push10=, $2, 2
137+
; RELAXED-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
138+
; RELAXED-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
139+
; RELAXED-NEXT: f32x4.extract_lane $push17=, $0, 3
140+
; RELAXED-NEXT: f32x4.extract_lane $push16=, $1, 3
141+
; RELAXED-NEXT: f32x4.extract_lane $push15=, $2, 3
142+
; RELAXED-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
143+
; RELAXED-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
144+
; RELAXED-NEXT: return $pop19
145+
;
146+
; STRICT-LABEL: fma_4xf32:
147+
; STRICT: .functype fma_4xf32 (v128, v128, v128) -> (v128)
148+
; STRICT-NEXT: # %bb.0:
149+
; STRICT-NEXT: f32x4.extract_lane $push2=, $0, 0
150+
; STRICT-NEXT: f32x4.extract_lane $push1=, $1, 0
151+
; STRICT-NEXT: f32x4.extract_lane $push0=, $2, 0
152+
; STRICT-NEXT: call $push3=, fmaf, $pop2, $pop1, $pop0
153+
; STRICT-NEXT: f32x4.splat $push4=, $pop3
154+
; STRICT-NEXT: f32x4.extract_lane $push7=, $0, 1
155+
; STRICT-NEXT: f32x4.extract_lane $push6=, $1, 1
156+
; STRICT-NEXT: f32x4.extract_lane $push5=, $2, 1
157+
; STRICT-NEXT: call $push8=, fmaf, $pop7, $pop6, $pop5
158+
; STRICT-NEXT: f32x4.replace_lane $push9=, $pop4, 1, $pop8
159+
; STRICT-NEXT: f32x4.extract_lane $push12=, $0, 2
160+
; STRICT-NEXT: f32x4.extract_lane $push11=, $1, 2
161+
; STRICT-NEXT: f32x4.extract_lane $push10=, $2, 2
162+
; STRICT-NEXT: call $push13=, fmaf, $pop12, $pop11, $pop10
163+
; STRICT-NEXT: f32x4.replace_lane $push14=, $pop9, 2, $pop13
164+
; STRICT-NEXT: f32x4.extract_lane $push17=, $0, 3
165+
; STRICT-NEXT: f32x4.extract_lane $push16=, $1, 3
166+
; STRICT-NEXT: f32x4.extract_lane $push15=, $2, 3
167+
; STRICT-NEXT: call $push18=, fmaf, $pop17, $pop16, $pop15
168+
; STRICT-NEXT: f32x4.replace_lane $push19=, $pop14, 3, $pop18
169+
; STRICT-NEXT: return $pop19
170+
%fma = call <4 x float> @llvm.fma(<4 x float> %a, <4 x float> %b, <4 x float> %c)
171+
ret <4 x float> %fma
172+
}
173+
174+
175+
define <8 x float> @fadd_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
176+
; RELAXED-LABEL: fadd_fmul_contract_8xf32:
177+
; RELAXED: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
178+
; RELAXED-NEXT: # %bb.0:
179+
; RELAXED-NEXT: f32x4.relaxed_madd $push0=, $6, $4, $2
180+
; RELAXED-NEXT: v128.store 16($0), $pop0
181+
; RELAXED-NEXT: f32x4.relaxed_madd $push1=, $5, $3, $1
182+
; RELAXED-NEXT: v128.store 0($0), $pop1
183+
; RELAXED-NEXT: return
184+
;
185+
; STRICT-LABEL: fadd_fmul_contract_8xf32:
186+
; STRICT: .functype fadd_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
187+
; STRICT-NEXT: # %bb.0:
188+
; STRICT-NEXT: f32x4.mul $push0=, $4, $2
189+
; STRICT-NEXT: f32x4.add $push1=, $pop0, $6
190+
; STRICT-NEXT: v128.store 16($0), $pop1
191+
; STRICT-NEXT: f32x4.mul $push2=, $3, $1
192+
; STRICT-NEXT: f32x4.add $push3=, $pop2, $5
193+
; STRICT-NEXT: v128.store 0($0), $pop3
194+
; STRICT-NEXT: return
195+
%mul = fmul contract <8 x float> %b, %a
196+
%add = fadd contract <8 x float> %mul, %c
197+
ret <8 x float> %add
198+
}
199+
200+
201+
define <2 x double> @fadd_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
202+
; RELAXED-LABEL: fadd_fmul_contract_2xf64:
203+
; RELAXED: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
204+
; RELAXED-NEXT: # %bb.0:
205+
; RELAXED-NEXT: f64x2.relaxed_madd $push0=, $2, $1, $0
206+
; RELAXED-NEXT: return $pop0
207+
;
208+
; STRICT-LABEL: fadd_fmul_contract_2xf64:
209+
; STRICT: .functype fadd_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
210+
; STRICT-NEXT: # %bb.0:
211+
; STRICT-NEXT: f64x2.mul $push0=, $1, $0
212+
; STRICT-NEXT: f64x2.add $push1=, $pop0, $2
213+
; STRICT-NEXT: return $pop1
214+
%mul = fmul contract <2 x double> %b, %a
215+
%add = fadd contract <2 x double> %mul, %c
216+
ret <2 x double> %add
217+
}
218+
219+
define float @fadd_fmul_contract_f32(float %a, float %b, float %c) {
220+
; RELAXED-LABEL: fadd_fmul_contract_f32:
221+
; RELAXED: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
222+
; RELAXED-NEXT: # %bb.0:
223+
; RELAXED-NEXT: f32.mul $push0=, $1, $0
224+
; RELAXED-NEXT: f32.add $push1=, $pop0, $2
225+
; RELAXED-NEXT: return $pop1
226+
;
227+
; STRICT-LABEL: fadd_fmul_contract_f32:
228+
; STRICT: .functype fadd_fmul_contract_f32 (f32, f32, f32) -> (f32)
229+
; STRICT-NEXT: # %bb.0:
230+
; STRICT-NEXT: f32.mul $push0=, $1, $0
231+
; STRICT-NEXT: f32.add $push1=, $pop0, $2
232+
; STRICT-NEXT: return $pop1
233+
%mul = fmul contract float %b, %a
234+
%add = fadd contract float %mul, %c
235+
ret float %add
236+
}
237+
238+
define float @fma_f32(float %a, float %b, float %c) {
239+
; RELAXED-LABEL: fma_f32:
240+
; RELAXED: .functype fma_f32 (f32, f32, f32) -> (f32)
241+
; RELAXED-NEXT: # %bb.0:
242+
; RELAXED-NEXT: call $push0=, fmaf, $0, $1, $2
243+
; RELAXED-NEXT: return $pop0
244+
;
245+
; STRICT-LABEL: fma_f32:
246+
; STRICT: .functype fma_f32 (f32, f32, f32) -> (f32)
247+
; STRICT-NEXT: # %bb.0:
248+
; STRICT-NEXT: call $push0=, fmaf, $0, $1, $2
249+
; STRICT-NEXT: return $pop0
250+
%fma = call float @llvm.fma(float %a, float %b, float %c)
251+
ret float %fma
252+
}
253+
254+
define double @fma_f64(double %a, double %b, double %c) {
255+
; RELAXED-LABEL: fma_f64:
256+
; RELAXED: .functype fma_f64 (f64, f64, f64) -> (f64)
257+
; RELAXED-NEXT: # %bb.0:
258+
; RELAXED-NEXT: call $push0=, fma, $0, $1, $2
259+
; RELAXED-NEXT: return $pop0
260+
;
261+
; STRICT-LABEL: fma_f64:
262+
; STRICT: .functype fma_f64 (f64, f64, f64) -> (f64)
263+
; STRICT-NEXT: # %bb.0:
264+
; STRICT-NEXT: call $push0=, fma, $0, $1, $2
265+
; STRICT-NEXT: return $pop0
266+
%fma = call double @llvm.fma(double %a, double %b, double %c)
267+
ret double %fma
268+
}

0 commit comments

Comments
 (0)