Skip to content

Commit a364251

Browse files
author
Simon Moll
committed
Merge commit '8c51812913c6aa301919a127d2cdfae7d9d6054a' into merge/ve-vvp-usdiv
2 parents b93ad3e + 8c51812 commit a364251

File tree

3 files changed

+165
-29
lines changed

3 files changed

+165
-29
lines changed

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 27 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,14 @@ multiclass Binary_rv_vv<
229229
defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
230230
}
231231

232+
multiclass Binary_rv_vr_vv<
233+
SDPatternOperator OpNode,
234+
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
235+
string OpBaseName> {
236+
defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
237+
defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
238+
}
239+
232240
// Expand both 64bit and 32 bit variant (256 elements)
233241
multiclass Binary_rv_vv_ShortLong<
234242
SDPatternOperator OpNode,
@@ -254,23 +262,6 @@ multiclass Binary_vr_vv_ShortLong<
254262
ShortOpBaseName>;
255263
}
256264

257-
// Binary operators that support broadcasts on LHS and RHS.
258-
multiclass Binary_all<
259-
SDPatternOperator OpNode,
260-
ValueType ScalarVT, ValueType DataVT,
261-
ValueType MaskVT, string OpBaseName> {
262-
defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
263-
defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
264-
}
265-
266-
multiclass Binary_ShortLong<
267-
SDPatternOperator OpNode,
268-
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
269-
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
270-
defm : Binary_all<OpNode, LongScalarVT, LongDataVT, v256i1, LongOpBaseName>;
271-
defm : Binary_all<OpNode, ShortScalarVT, ShortDataVT, v256i1, ShortOpBaseName>;
272-
}
273-
274265
multiclass Ternary<
275266
SDPatternOperator OpNode,
276267
ValueType ScalarVT, ValueType DataVT,
@@ -332,6 +323,18 @@ multiclass Ternary_ShortLong<
332323
// Integer arithmetic (256 elements)
333324
defm : Unary_ShortLong<vvp_ctpop, i64, v256i64, "VPCNT", i32, v256i32, "PVPCNTLO">;
334325

326+
multiclass Binary_rv_vr_vv_ShortLong<
327+
SDPatternOperator OpNode,
328+
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
329+
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
330+
defm : Binary_rv_vr_vv<OpNode,
331+
LongScalarVT, LongDataVT, v256i1,
332+
LongOpBaseName>;
333+
defm : Binary_rv_vr_vv<OpNode,
334+
ShortScalarVT, ShortDataVT, v256i1,
335+
ShortOpBaseName>;
336+
}
337+
335338
defm : Binary_rv_vv_ShortLong<c_vvp_add,
336339
i64, v256i64, "VADDSL",
337340
i32, v256i32, "VADDSWSX">;
@@ -341,6 +344,12 @@ defm : Binary_rv_vv_ShortLong<vvp_sub,
341344
defm : Binary_rv_vv_ShortLong<c_vvp_mul,
342345
i64, v256i64, "VMULSL",
343346
i32, v256i32, "VMULSWSX">;
347+
defm : Binary_rv_vr_vv_ShortLong<vvp_sdiv,
348+
i64, v256i64, "VDIVSL",
349+
i32, v256i32, "VDIVSWSX">;
350+
defm : Binary_rv_vr_vv_ShortLong<vvp_udiv,
351+
i64, v256i64, "VDIVUL",
352+
i32, v256i32, "VDIVUW">;
344353
defm : Binary_rv_vv_ShortLong<c_vvp_and,
345354
i64, v256i64, "VAND",
346355
i32, v256i32, "PVANDLO">;
@@ -351,9 +360,6 @@ defm : Binary_rv_vv_ShortLong<c_vvp_xor,
351360
i64, v256i64, "VXOR",
352361
i32, v256i32, "PVXORLO">;
353362

354-
defm : Binary_ShortLong<vvp_sdiv, i64, v256i64, "VDIVSL", i32, v256i32, "VDIVSWSX">;
355-
defm : Binary_ShortLong<vvp_udiv, i64, v256i64, "VDIVUL", i32, v256i32, "VDIVUW">;
356-
357363
defm : Binary_vr_vv_ShortLong<vvp_shl,
358364
i64, v256i64, "VSLL",
359365
i32, v256i32, "PVSLLLO">;
@@ -370,7 +376,7 @@ defm : Unary_ShortLong<vvp_fsqrt, f64, v256f64, "VFSQRTD", f32, v256f32, "VFSQRT
370376
defm : Binary_rv_vv_ShortLong<c_vvp_fadd, f64, v256f64, "VFADDD", f32, v256f32, "PVFADDUP">;
371377
defm : Binary_rv_vv_ShortLong<vvp_fsub, f64, v256f64, "VFSUBD", f32, v256f32, "PVFSUBUP">;
372378
defm : Binary_rv_vv_ShortLong<c_vvp_fmul, f64, v256f64, "VFMULD", f32, v256f32, "PVFMULUP">;
373-
defm : Binary_ShortLong<vvp_fdiv, f64, v256f64, "VFDIVD", f32, v256f32, "VFDIVS">;
379+
defm : Binary_rv_vr_vv_ShortLong<vvp_fdiv, f64, v256f64, "VFDIVD", f32, v256f32, "VFDIVS">;
374380

375381
defm : Binary_rv_vv_ShortLong<c_vvp_fminnum, f64, v256f64, "VFMIND", f32, v256f32, "VFMINS">;
376382
defm : Binary_rv_vv_ShortLong<c_vvp_fmaxnum, f64, v256f64, "VFMAXD", f32, v256f32, "VFMAXS">;
Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
33

4-
define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
5-
; CHECK-LABEL: test_vp_int:
4+
declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
5+
6+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_sdiv_v256i32_vv:
68
; CHECK: # %bb.0:
79
; CHECK-NEXT: and %s0, %s0, (32)0
810
; CHECK-NEXT: lvl %s0
@@ -12,5 +14,68 @@ define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x
1214
ret <256 x i32> %r0
1315
}
1416

15-
; integer arith
16-
declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
17+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_sdiv_v256i32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: vdivs.w.sx %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x i32> undef, i32 %s0, i32 0
25+
%i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x i32> %r0
28+
}
29+
30+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_sdiv_v256i32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: vdivs.w.sx %v0, %v0, %s0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x i32> undef, i32 %s1, i32 0
38+
%i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x i32> %r0
41+
}
42+
43+
44+
declare <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
45+
46+
define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_int_v256i64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vdivs.l %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x i64> %r0
55+
}
56+
57+
define fastcc <256 x i64> @test_vp_sdiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_sdiv_v256i64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vdivs.l %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x i64> undef, i64 %s0, i32 0
65+
%i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x i64> %r0
68+
}
69+
70+
define fastcc <256 x i64> @test_vp_sdiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_sdiv_v256i64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vdivs.l %v0, %v0, %s0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x i64> undef, i64 %s1, i32 0
78+
%i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x i64> %r0
81+
}
Lines changed: 69 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
33

4-
define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
5-
; CHECK-LABEL: test_vp_int:
4+
declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
5+
6+
define fastcc <256 x i32> @test_vp_udiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_udiv_v256i32_vv:
68
; CHECK: # %bb.0:
79
; CHECK-NEXT: and %s0, %s0, (32)0
810
; CHECK-NEXT: lvl %s0
@@ -12,5 +14,68 @@ define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x
1214
ret <256 x i32> %r0
1315
}
1416

15-
; integer arith
16-
declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
17+
define fastcc <256 x i32> @test_vp_udiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_udiv_v256i32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: lvl %s1
22+
; CHECK-NEXT: vdivu.w %v0, %s0, %v0, %vm1
23+
; CHECK-NEXT: b.l.t (, %s10)
24+
%xins = insertelement <256 x i32> undef, i32 %s0, i32 0
25+
%i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
26+
%r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
27+
ret <256 x i32> %r0
28+
}
29+
30+
define fastcc <256 x i32> @test_vp_udiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
31+
; CHECK-LABEL: test_vp_udiv_v256i32_vr:
32+
; CHECK: # %bb.0:
33+
; CHECK-NEXT: and %s1, %s1, (32)0
34+
; CHECK-NEXT: lvl %s1
35+
; CHECK-NEXT: vdivu.w %v0, %v0, %s0, %vm1
36+
; CHECK-NEXT: b.l.t (, %s10)
37+
%yins = insertelement <256 x i32> undef, i32 %s1, i32 0
38+
%i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
39+
%r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
40+
ret <256 x i32> %r0
41+
}
42+
43+
44+
declare <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
45+
46+
define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
47+
; CHECK-LABEL: test_vp_int_v256i64_vv:
48+
; CHECK: # %bb.0:
49+
; CHECK-NEXT: and %s0, %s0, (32)0
50+
; CHECK-NEXT: lvl %s0
51+
; CHECK-NEXT: vdivu.l %v0, %v0, %v1, %vm1
52+
; CHECK-NEXT: b.l.t (, %s10)
53+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
54+
ret <256 x i64> %r0
55+
}
56+
57+
define fastcc <256 x i64> @test_vp_udiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
58+
; CHECK-LABEL: test_vp_udiv_v256i64_rv:
59+
; CHECK: # %bb.0:
60+
; CHECK-NEXT: and %s1, %s1, (32)0
61+
; CHECK-NEXT: lvl %s1
62+
; CHECK-NEXT: vdivu.l %v0, %s0, %v0, %vm1
63+
; CHECK-NEXT: b.l.t (, %s10)
64+
%xins = insertelement <256 x i64> undef, i64 %s0, i32 0
65+
%i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
66+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
67+
ret <256 x i64> %r0
68+
}
69+
70+
define fastcc <256 x i64> @test_vp_udiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
71+
; CHECK-LABEL: test_vp_udiv_v256i64_vr:
72+
; CHECK: # %bb.0:
73+
; CHECK-NEXT: and %s1, %s1, (32)0
74+
; CHECK-NEXT: lvl %s1
75+
; CHECK-NEXT: vdivu.l %v0, %v0, %s0, %vm1
76+
; CHECK-NEXT: b.l.t (, %s10)
77+
%yins = insertelement <256 x i64> undef, i64 %s1, i32 0
78+
%i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
79+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
80+
ret <256 x i64> %r0
81+
}

0 commit comments

Comments
 (0)