Skip to content

Commit 8c51812

Browse files
author
Simon Moll
committed
[VE] U|SDIV v256i32|64 isel and tests
Reviewed By: kaz7 Differential Revision: https://reviews.llvm.org/D115940
1 parent 500c4b6 commit 8c51812

File tree

5 files changed

+185
-20
lines changed

5 files changed

+185
-20
lines changed

llvm/lib/Target/VE/VVPInstrInfo.td

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ def vvp_sub : SDNode<"VEISD::VVP_SUB", SDTIntBinOpVVP>;
4545
def vvp_mul : SDNode<"VEISD::VVP_MUL", SDTIntBinOpVVP>;
4646
def c_vvp_mul : vvp_commutative<vvp_mul>;
4747

48+
def vvp_sdiv : SDNode<"VEISD::VVP_SDIV", SDTIntBinOpVVP>;
49+
def vvp_udiv : SDNode<"VEISD::VVP_UDIV", SDTIntBinOpVVP>;
50+
4851
def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>;
4952
def c_vvp_and : vvp_commutative<vvp_and>;
5053

llvm/lib/Target/VE/VVPInstrPatternsVec.td

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,14 @@ multiclass Binary_vr_vv<
100100
defm : Binary_vv<OpNode, DataVT, MaskVT, OpBaseName>;
101101
}
102102

103+
multiclass Binary_rv_vr_vv<
104+
SDPatternOperator OpNode,
105+
ValueType ScalarVT, ValueType DataVT, ValueType MaskVT,
106+
string OpBaseName> {
107+
defm : Binary_rv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
108+
defm : Binary_vr_vv<OpNode, ScalarVT, DataVT, MaskVT, OpBaseName>;
109+
}
110+
103111
// Expand both 64bit and 32 bit variant (256 elements)
104112
multiclass Binary_rv_vv_ShortLong<
105113
SDPatternOperator OpNode,
@@ -125,6 +133,18 @@ multiclass Binary_vr_vv_ShortLong<
125133
ShortOpBaseName>;
126134
}
127135

136+
multiclass Binary_rv_vr_vv_ShortLong<
137+
SDPatternOperator OpNode,
138+
ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName,
139+
ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> {
140+
defm : Binary_rv_vr_vv<OpNode,
141+
LongScalarVT, LongDataVT, v256i1,
142+
LongOpBaseName>;
143+
defm : Binary_rv_vr_vv<OpNode,
144+
ShortScalarVT, ShortDataVT, v256i1,
145+
ShortOpBaseName>;
146+
}
147+
128148
defm : Binary_rv_vv_ShortLong<c_vvp_add,
129149
i64, v256i64, "VADDSL",
130150
i32, v256i32, "VADDSWSX">;
@@ -134,6 +154,12 @@ defm : Binary_rv_vv_ShortLong<vvp_sub,
134154
defm : Binary_rv_vv_ShortLong<c_vvp_mul,
135155
i64, v256i64, "VMULSL",
136156
i32, v256i32, "VMULSWSX">;
157+
defm : Binary_rv_vr_vv_ShortLong<vvp_sdiv,
158+
i64, v256i64, "VDIVSL",
159+
i32, v256i32, "VDIVSWSX">;
160+
defm : Binary_rv_vr_vv_ShortLong<vvp_udiv,
161+
i64, v256i64, "VDIVUL",
162+
i32, v256i32, "VDIVUW">;
137163
defm : Binary_rv_vv_ShortLong<c_vvp_and,
138164
i64, v256i64, "VAND",
139165
i32, v256i32, "PVANDLO">;

llvm/lib/Target/VE/VVPNodes.def

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
ADD_BINARY_VVP_OP_COMPACT(ADD)
4343
ADD_BINARY_VVP_OP_COMPACT(SUB)
4444
ADD_BINARY_VVP_OP_COMPACT(MUL)
45+
ADD_BINARY_VVP_OP_COMPACT(UDIV)
46+
ADD_BINARY_VVP_OP_COMPACT(SDIV)
4547

4648
ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA)
4749
ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL)
Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,83 @@
1-
; REQUIRES: asserts
2-
; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
33

4-
; CHECK: t{{[0-9]+}}: v256i32 = vp_sdiv [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]]
5-
; CHECK: [[A]]: v256i32
6-
; CHECK: [[B]]: v256i32
7-
; CHECK: [[MASK]]: v256i1
8-
; CHECK: [[EVL]]: i32
4+
declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
95

10-
define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
6+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_sdiv_v256i32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vdivs.w.sx %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
1113
%r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
1214
ret <256 x i32> %r0
1315
}
1416

15-
; integer arith
16-
declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
17+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_sdiv_v256i32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: and %s0, %s0, (32)0
22+
; CHECK-NEXT: lvl %s1
23+
; CHECK-NEXT: vdivs.w.sx %v0, %s0, %v0, %vm1
24+
; CHECK-NEXT: b.l.t (, %s10)
25+
%xins = insertelement <256 x i32> undef, i32 %s0, i32 0
26+
%i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
27+
%r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
28+
ret <256 x i32> %r0
29+
}
30+
31+
define fastcc <256 x i32> @test_vp_sdiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
32+
; CHECK-LABEL: test_vp_sdiv_v256i32_vr:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: and %s1, %s1, (32)0
35+
; CHECK-NEXT: and %s0, %s0, (32)0
36+
; CHECK-NEXT: lvl %s1
37+
; CHECK-NEXT: vdivs.w.sx %v0, %v0, %s0, %vm1
38+
; CHECK-NEXT: b.l.t (, %s10)
39+
%yins = insertelement <256 x i32> undef, i32 %s1, i32 0
40+
%i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
41+
%r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
42+
ret <256 x i32> %r0
43+
}
44+
45+
46+
declare <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
47+
48+
define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
49+
; CHECK-LABEL: test_vp_int_v256i64_vv:
50+
; CHECK: # %bb.0:
51+
; CHECK-NEXT: and %s0, %s0, (32)0
52+
; CHECK-NEXT: lvl %s0
53+
; CHECK-NEXT: vdivs.l %v0, %v0, %v1, %vm1
54+
; CHECK-NEXT: b.l.t (, %s10)
55+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
56+
ret <256 x i64> %r0
57+
}
58+
59+
define fastcc <256 x i64> @test_vp_sdiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
60+
; CHECK-LABEL: test_vp_sdiv_v256i64_rv:
61+
; CHECK: # %bb.0:
62+
; CHECK-NEXT: and %s1, %s1, (32)0
63+
; CHECK-NEXT: lvl %s1
64+
; CHECK-NEXT: vdivs.l %v0, %s0, %v0, %vm1
65+
; CHECK-NEXT: b.l.t (, %s10)
66+
%xins = insertelement <256 x i64> undef, i64 %s0, i32 0
67+
%i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
68+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
69+
ret <256 x i64> %r0
70+
}
71+
72+
define fastcc <256 x i64> @test_vp_sdiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
73+
; CHECK-LABEL: test_vp_sdiv_v256i64_vr:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: and %s1, %s1, (32)0
76+
; CHECK-NEXT: lvl %s1
77+
; CHECK-NEXT: vdivs.l %v0, %v0, %s0, %vm1
78+
; CHECK-NEXT: b.l.t (, %s10)
79+
%yins = insertelement <256 x i64> undef, i64 %s1, i32 0
80+
%i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
81+
%r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
82+
ret <256 x i64> %r0
83+
}
Lines changed: 77 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,83 @@
1-
; REQUIRES: asserts
2-
; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
33

4-
; CHECK: t{{[0-9]+}}: v256i32 = vp_udiv [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]]
5-
; CHECK: [[A]]: v256i32
6-
; CHECK: [[B]]: v256i32
7-
; CHECK: [[MASK]]: v256i1
8-
; CHECK: [[EVL]]: i32
4+
declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
95

10-
define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
6+
define fastcc <256 x i32> @test_vp_udiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
7+
; CHECK-LABEL: test_vp_udiv_v256i32_vv:
8+
; CHECK: # %bb.0:
9+
; CHECK-NEXT: and %s0, %s0, (32)0
10+
; CHECK-NEXT: lvl %s0
11+
; CHECK-NEXT: vdivu.w %v0, %v0, %v1, %vm1
12+
; CHECK-NEXT: b.l.t (, %s10)
1113
%r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
1214
ret <256 x i32> %r0
1315
}
1416

15-
; integer arith
16-
declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)
17+
define fastcc <256 x i32> @test_vp_udiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) {
18+
; CHECK-LABEL: test_vp_udiv_v256i32_rv:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: and %s1, %s1, (32)0
21+
; CHECK-NEXT: and %s0, %s0, (32)0
22+
; CHECK-NEXT: lvl %s1
23+
; CHECK-NEXT: vdivu.w %v0, %s0, %v0, %vm1
24+
; CHECK-NEXT: b.l.t (, %s10)
25+
%xins = insertelement <256 x i32> undef, i32 %s0, i32 0
26+
%i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer
27+
%r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
28+
ret <256 x i32> %r0
29+
}
30+
31+
define fastcc <256 x i32> @test_vp_udiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) {
32+
; CHECK-LABEL: test_vp_udiv_v256i32_vr:
33+
; CHECK: # %bb.0:
34+
; CHECK-NEXT: and %s1, %s1, (32)0
35+
; CHECK-NEXT: and %s0, %s0, (32)0
36+
; CHECK-NEXT: lvl %s1
37+
; CHECK-NEXT: vdivu.w %v0, %v0, %s0, %vm1
38+
; CHECK-NEXT: b.l.t (, %s10)
39+
%yins = insertelement <256 x i32> undef, i32 %s1, i32 0
40+
%i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer
41+
%r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n)
42+
ret <256 x i32> %r0
43+
}
44+
45+
46+
declare <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32)
47+
48+
define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
49+
; CHECK-LABEL: test_vp_int_v256i64_vv:
50+
; CHECK: # %bb.0:
51+
; CHECK-NEXT: and %s0, %s0, (32)0
52+
; CHECK-NEXT: lvl %s0
53+
; CHECK-NEXT: vdivu.l %v0, %v0, %v1, %vm1
54+
; CHECK-NEXT: b.l.t (, %s10)
55+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
56+
ret <256 x i64> %r0
57+
}
58+
59+
define fastcc <256 x i64> @test_vp_udiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) {
60+
; CHECK-LABEL: test_vp_udiv_v256i64_rv:
61+
; CHECK: # %bb.0:
62+
; CHECK-NEXT: and %s1, %s1, (32)0
63+
; CHECK-NEXT: lvl %s1
64+
; CHECK-NEXT: vdivu.l %v0, %s0, %v0, %vm1
65+
; CHECK-NEXT: b.l.t (, %s10)
66+
%xins = insertelement <256 x i64> undef, i64 %s0, i32 0
67+
%i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer
68+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
69+
ret <256 x i64> %r0
70+
}
71+
72+
define fastcc <256 x i64> @test_vp_udiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) {
73+
; CHECK-LABEL: test_vp_udiv_v256i64_vr:
74+
; CHECK: # %bb.0:
75+
; CHECK-NEXT: and %s1, %s1, (32)0
76+
; CHECK-NEXT: lvl %s1
77+
; CHECK-NEXT: vdivu.l %v0, %v0, %s0, %vm1
78+
; CHECK-NEXT: b.l.t (, %s10)
79+
%yins = insertelement <256 x i64> undef, i64 %s1, i32 0
80+
%i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer
81+
%r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n)
82+
ret <256 x i64> %r0
83+
}

0 commit comments

Comments
 (0)