Skip to content

Commit bb5969c

Browse files
badumbatishmikolaj-pirog
authored andcommitted
[WebAssembly] [Codegen] Add pattern for relaxed min max from pmin/pmax-based patterns over v4f32 and v2f64 (llvm#164486)
Related to llvm#55932
1 parent 1943bc5 commit bb5969c

File tree

3 files changed

+514
-12
lines changed

3 files changed

+514
-12
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1336,22 +1336,25 @@ def pmax : PatFrags<(ops node:$lhs, node:$rhs), [
13361336
]>;
13371337
defm PMAX : SIMDBinaryFP<pmax, "pmax", 235>;
13381338

1339+
multiclass PMinMaxInt<Vec vec, NI baseMinInst, NI baseMaxInst> {
1340+
def : Pat<(vec.int_vt (vselect
1341+
(setolt (vec.vt (bitconvert V128:$rhs)),
1342+
(vec.vt (bitconvert V128:$lhs))),
1343+
V128:$rhs, V128:$lhs)),
1344+
(baseMinInst $lhs, $rhs)>;
1345+
def : Pat<(vec.int_vt (vselect
1346+
(setolt (vec.vt (bitconvert V128:$lhs)),
1347+
(vec.vt (bitconvert V128:$rhs))),
1348+
V128:$rhs, V128:$lhs)),
1349+
(baseMaxInst $lhs, $rhs)>;
1350+
}
13391351
// Also match the pmin/pmax cases where the operands are int vectors (but the
13401352
// comparison is still a floating point comparison). This can happen when using
13411353
// the wasm_simd128.h intrinsics because v128_t is an integer vector.
13421354
foreach vec = [F32x4, F64x2, F16x8] in {
1343-
defvar pmin = !cast<NI>("PMIN_"#vec);
1344-
defvar pmax = !cast<NI>("PMAX_"#vec);
1345-
def : Pat<(vec.int_vt (vselect
1346-
(setolt (vec.vt (bitconvert V128:$rhs)),
1347-
(vec.vt (bitconvert V128:$lhs))),
1348-
V128:$rhs, V128:$lhs)),
1349-
(pmin $lhs, $rhs)>;
1350-
def : Pat<(vec.int_vt (vselect
1351-
(setolt (vec.vt (bitconvert V128:$lhs)),
1352-
(vec.vt (bitconvert V128:$rhs))),
1353-
V128:$rhs, V128:$lhs)),
1354-
(pmax $lhs, $rhs)>;
1355+
defvar pmin = !cast<NI>("PMIN_"#vec);
1356+
defvar pmax = !cast<NI>("PMAX_"#vec);
1357+
defm : PMinMaxInt<vec, pmin, pmax>;
13551358
}
13561359

13571360
// And match the pmin/pmax LLVM intrinsics as well
@@ -1756,6 +1759,15 @@ let Predicates = [HasRelaxedSIMD] in {
17561759
(relaxed_max V128:$lhs, V128:$rhs)>;
17571760
def : Pat<(vec.vt (fmaximumnum (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
17581761
(relaxed_max V128:$lhs, V128:$rhs)>;
1762+
1763+
// Transform pmin/max-supposed patterns to relaxed min max
1764+
let AddedComplexity = 1 in {
1765+
def : Pat<(vec.vt (pmin (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
1766+
(relaxed_min $lhs, $rhs)>;
1767+
def : Pat<(vec.vt (pmax (vec.vt V128:$lhs), (vec.vt V128:$rhs))),
1768+
(relaxed_max $lhs, $rhs)>;
1769+
defm : PMinMaxInt<vec, relaxed_min, relaxed_max>;
1770+
}
17591771
}
17601772
}
17611773

llvm/test/CodeGen/WebAssembly/simd-relaxed-fmax.ll

Lines changed: 244 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,250 @@ define <2 x double> @test_minimumnum_f64x2(<2 x double> %a, <2 x double> %b) {
5454
ret <2 x double> %result
5555
}
5656

57+
define <4 x float> @test_pmax_v4f32_olt(<4 x float> %x, <4 x float> %y) {
58+
; CHECK-LABEL: test_pmax_v4f32_olt:
59+
; CHECK: .functype test_pmax_v4f32_olt (v128, v128) -> (v128)
60+
; CHECK-NEXT: # %bb.0:
61+
; CHECK-NEXT: local.get 0
62+
; CHECK-NEXT: local.get 1
63+
; CHECK-NEXT: f32x4.relaxed_max
64+
; CHECK-NEXT: # fallthrough-return
65+
%c = fcmp olt <4 x float> %x, %y
66+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
67+
ret <4 x float> %a
68+
}
69+
70+
define <4 x float> @test_pmax_v4f32_ole(<4 x float> %x, <4 x float> %y) {
71+
; CHECK-LABEL: test_pmax_v4f32_ole:
72+
; CHECK: .functype test_pmax_v4f32_ole (v128, v128) -> (v128)
73+
; CHECK-NEXT: # %bb.0:
74+
; CHECK-NEXT: local.get 0
75+
; CHECK-NEXT: local.get 1
76+
; CHECK-NEXT: f32x4.relaxed_max
77+
; CHECK-NEXT: # fallthrough-return
78+
%c = fcmp ole <4 x float> %x, %y
79+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
80+
ret <4 x float> %a
81+
}
82+
83+
define <4 x float> @test_pmax_v4f32_ogt(<4 x float> %x, <4 x float> %y) {
84+
; CHECK-LABEL: test_pmax_v4f32_ogt:
85+
; CHECK: .functype test_pmax_v4f32_ogt (v128, v128) -> (v128)
86+
; CHECK-NEXT: # %bb.0:
87+
; CHECK-NEXT: local.get 0
88+
; CHECK-NEXT: local.get 1
89+
; CHECK-NEXT: f32x4.relaxed_max
90+
; CHECK-NEXT: # fallthrough-return
91+
%c = fcmp ogt <4 x float> %y, %x
92+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
93+
ret <4 x float> %a
94+
}
95+
96+
define <4 x float> @test_pmax_v4f32_oge(<4 x float> %x, <4 x float> %y) {
97+
; CHECK-LABEL: test_pmax_v4f32_oge:
98+
; CHECK: .functype test_pmax_v4f32_oge (v128, v128) -> (v128)
99+
; CHECK-NEXT: # %bb.0:
100+
; CHECK-NEXT: local.get 0
101+
; CHECK-NEXT: local.get 1
102+
; CHECK-NEXT: f32x4.relaxed_max
103+
; CHECK-NEXT: # fallthrough-return
104+
%c = fcmp oge <4 x float> %y, %x
105+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
106+
ret <4 x float> %a
107+
}
108+
109+
; For setlt
110+
define <4 x float> @pmax_v4f32_fast_olt(<4 x float> %x, <4 x float> %y) {
111+
; CHECK-LABEL: pmax_v4f32_fast_olt:
112+
; CHECK: .functype pmax_v4f32_fast_olt (v128, v128) -> (v128)
113+
; CHECK-NEXT: # %bb.0:
114+
; CHECK-NEXT: local.get 0
115+
; CHECK-NEXT: local.get 1
116+
; CHECK-NEXT: f32x4.relaxed_max
117+
; CHECK-NEXT: # fallthrough-return
118+
%c = fcmp fast olt <4 x float> %x, %y
119+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
120+
ret <4 x float> %a
121+
}
122+
123+
; For setle
124+
define <4 x float> @test_pmax_v4f32_fast_ole(<4 x float> %x, <4 x float> %y) {
125+
; CHECK-LABEL: test_pmax_v4f32_fast_ole:
126+
; CHECK: .functype test_pmax_v4f32_fast_ole (v128, v128) -> (v128)
127+
; CHECK-NEXT: # %bb.0:
128+
; CHECK-NEXT: local.get 0
129+
; CHECK-NEXT: local.get 1
130+
; CHECK-NEXT: f32x4.relaxed_max
131+
; CHECK-NEXT: # fallthrough-return
132+
%c = fcmp fast ole <4 x float> %x, %y
133+
%a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x
134+
ret <4 x float> %a
135+
}
136+
137+
; For setgt
138+
define <4 x float> @test_pmax_v4f32_fast_ogt(<4 x float> %x, <4 x float> %y) {
139+
; CHECK-LABEL: test_pmax_v4f32_fast_ogt:
140+
; CHECK: .functype test_pmax_v4f32_fast_ogt (v128, v128) -> (v128)
141+
; CHECK-NEXT: # %bb.0:
142+
; CHECK-NEXT: local.get 0
143+
; CHECK-NEXT: local.get 1
144+
; CHECK-NEXT: f32x4.relaxed_max
145+
; CHECK-NEXT: # fallthrough-return
146+
%c = fcmp fast ogt <4 x float> %x, %y
147+
%a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
148+
ret <4 x float> %a
149+
}
150+
151+
; For setge
152+
define <4 x float> @test_pmax_v4f32_fast_oge(<4 x float> %x, <4 x float> %y) {
153+
; CHECK-LABEL: test_pmax_v4f32_fast_oge:
154+
; CHECK: .functype test_pmax_v4f32_fast_oge (v128, v128) -> (v128)
155+
; CHECK-NEXT: # %bb.0:
156+
; CHECK-NEXT: local.get 0
157+
; CHECK-NEXT: local.get 1
158+
; CHECK-NEXT: f32x4.relaxed_max
159+
; CHECK-NEXT: # fallthrough-return
160+
%c = fcmp fast oge <4 x float> %x, %y
161+
%a = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
162+
ret <4 x float> %a
163+
}
164+
165+
define <4 x i32> @test_pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) {
166+
; CHECK-LABEL: test_pmax_int_v4f32:
167+
; CHECK: .functype test_pmax_int_v4f32 (v128, v128) -> (v128)
168+
; CHECK-NEXT: # %bb.0:
169+
; CHECK-NEXT: local.get 1
170+
; CHECK-NEXT: local.get 0
171+
; CHECK-NEXT: f32x4.relaxed_max
172+
; CHECK-NEXT: # fallthrough-return
173+
%fx = bitcast <4 x i32> %x to <4 x float>
174+
%fy = bitcast <4 x i32> %y to <4 x float>
175+
%c = fcmp olt <4 x float> %fy, %fx
176+
%a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
177+
ret <4 x i32> %a
178+
}
179+
180+
define <2 x double> @test_pmax_v2f64_olt(<2 x double> %x, <2 x double> %y) {
181+
; CHECK-LABEL: test_pmax_v2f64_olt:
182+
; CHECK: .functype test_pmax_v2f64_olt (v128, v128) -> (v128)
183+
; CHECK-NEXT: # %bb.0:
184+
; CHECK-NEXT: local.get 0
185+
; CHECK-NEXT: local.get 1
186+
; CHECK-NEXT: f64x2.relaxed_max
187+
; CHECK-NEXT: # fallthrough-return
188+
%c = fcmp olt <2 x double> %x, %y
189+
%a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
190+
ret <2 x double> %a
191+
}
192+
193+
define <2 x double> @test_pmax_v2f64_ole(<2 x double> %x, <2 x double> %y) {
194+
; CHECK-LABEL: test_pmax_v2f64_ole:
195+
; CHECK: .functype test_pmax_v2f64_ole (v128, v128) -> (v128)
196+
; CHECK-NEXT: # %bb.0:
197+
; CHECK-NEXT: local.get 0
198+
; CHECK-NEXT: local.get 1
199+
; CHECK-NEXT: f64x2.relaxed_max
200+
; CHECK-NEXT: # fallthrough-return
201+
%c = fcmp ole <2 x double> %x, %y
202+
%a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
203+
ret <2 x double> %a
204+
}
205+
206+
define <2 x double> @test_pmax_v2f64_ogt(<2 x double> %x, <2 x double> %y) {
207+
; CHECK-LABEL: test_pmax_v2f64_ogt:
208+
; CHECK: .functype test_pmax_v2f64_ogt (v128, v128) -> (v128)
209+
; CHECK-NEXT: # %bb.0:
210+
; CHECK-NEXT: local.get 1
211+
; CHECK-NEXT: local.get 0
212+
; CHECK-NEXT: f64x2.relaxed_max
213+
; CHECK-NEXT: # fallthrough-return
214+
%c = fcmp ogt <2 x double> %x, %y
215+
%a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
216+
ret <2 x double> %a
217+
}
218+
define <2 x double> @test_pmax_v2f64_oge(<2 x double> %x, <2 x double> %y) {
219+
; CHECK-LABEL: test_pmax_v2f64_oge:
220+
; CHECK: .functype test_pmax_v2f64_oge (v128, v128) -> (v128)
221+
; CHECK-NEXT: # %bb.0:
222+
; CHECK-NEXT: local.get 1
223+
; CHECK-NEXT: local.get 0
224+
; CHECK-NEXT: f64x2.relaxed_max
225+
; CHECK-NEXT: # fallthrough-return
226+
%c = fcmp oge <2 x double> %x, %y
227+
%a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
228+
ret <2 x double> %a
229+
}
230+
231+
; For setlt
232+
define <2 x double> @pmax_v2f64_fast_olt(<2 x double> %x, <2 x double> %y) {
233+
; CHECK-LABEL: pmax_v2f64_fast_olt:
234+
; CHECK: .functype pmax_v2f64_fast_olt (v128, v128) -> (v128)
235+
; CHECK-NEXT: # %bb.0:
236+
; CHECK-NEXT: local.get 0
237+
; CHECK-NEXT: local.get 1
238+
; CHECK-NEXT: f64x2.relaxed_max
239+
; CHECK-NEXT: # fallthrough-return
240+
%c = fcmp fast olt <2 x double> %x, %y
241+
%a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
242+
ret <2 x double> %a
243+
}
244+
245+
; For setle
246+
define <2 x double> @test_pmax_v2f64_fast_ole(<2 x double> %x, <2 x double> %y) {
247+
; CHECK-LABEL: test_pmax_v2f64_fast_ole:
248+
; CHECK: .functype test_pmax_v2f64_fast_ole (v128, v128) -> (v128)
249+
; CHECK-NEXT: # %bb.0:
250+
; CHECK-NEXT: local.get 0
251+
; CHECK-NEXT: local.get 1
252+
; CHECK-NEXT: f64x2.relaxed_max
253+
; CHECK-NEXT: # fallthrough-return
254+
%c = fcmp fast ole <2 x double> %x, %y
255+
%a = select <2 x i1> %c, <2 x double> %y, <2 x double> %x
256+
ret <2 x double> %a
257+
}
258+
; For setgt
259+
define <2 x double> @test_pmax_v2f64_fast_ogt(<2 x double> %x, <2 x double> %y) {
260+
; CHECK-LABEL: test_pmax_v2f64_fast_ogt:
261+
; CHECK: .functype test_pmax_v2f64_fast_ogt (v128, v128) -> (v128)
262+
; CHECK-NEXT: # %bb.0:
263+
; CHECK-NEXT: local.get 0
264+
; CHECK-NEXT: local.get 1
265+
; CHECK-NEXT: f64x2.relaxed_max
266+
; CHECK-NEXT: # fallthrough-return
267+
%c = fcmp fast ogt <2 x double> %x, %y
268+
%a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
269+
ret <2 x double> %a
270+
}
271+
272+
; For setge
273+
define <2 x double> @test_pmax_v2f64_fast_oge(<2 x double> %x, <2 x double> %y) {
274+
; CHECK-LABEL: test_pmax_v2f64_fast_oge:
275+
; CHECK: .functype test_pmax_v2f64_fast_oge (v128, v128) -> (v128)
276+
; CHECK-NEXT: # %bb.0:
277+
; CHECK-NEXT: local.get 0
278+
; CHECK-NEXT: local.get 1
279+
; CHECK-NEXT: f64x2.relaxed_max
280+
; CHECK-NEXT: # fallthrough-return
281+
%c = fcmp fast oge <2 x double> %x, %y
282+
%a = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
283+
ret <2 x double> %a
284+
}
285+
286+
define <2 x i64> @test_pmax_int_v2f64(<2 x i64> %x, <2 x i64> %y) {
287+
; CHECK-LABEL: test_pmax_int_v2f64:
288+
; CHECK: .functype test_pmax_int_v2f64 (v128, v128) -> (v128)
289+
; CHECK-NEXT: # %bb.0:
290+
; CHECK-NEXT: local.get 1
291+
; CHECK-NEXT: local.get 0
292+
; CHECK-NEXT: f64x2.relaxed_max
293+
; CHECK-NEXT: # fallthrough-return
294+
%fx = bitcast <2 x i64> %x to <2 x double>
295+
%fy = bitcast <2 x i64> %y to <2 x double>
296+
%c = fcmp olt <2 x double> %fy, %fx
297+
%a = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
298+
ret <2 x i64> %a
299+
}
300+
57301
declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>)
58302
declare <4 x float> @llvm.maximumnum.v4f32(<4 x float>, <4 x float>)
59303
declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>)

0 commit comments

Comments
 (0)