Skip to content

Commit 7c8b20f

Browse files
committed
Avoid choosing a bad ElementCount for splatting the condition
1 parent 1262bd3 commit 7c8b20f

File tree

3 files changed

+36
-140
lines changed

3 files changed

+36
-140
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26965,6 +26965,11 @@ static SDValue performSelectCombine(SDNode *N,
2696526965
if (!ResVT.isVector() || NumMaskElts == 0)
2696626966
return SDValue();
2696726967

26968+
// Avoid creating vectors with excessive VFs for small types.
26969+
if (DCI.isBeforeLegalize() &&
26970+
SrcVT.getSizeInBits() < ResVT.getScalarSizeInBits())
26971+
NumMaskElts = ResVT.getVectorNumElements();
26972+
2696826973
SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
2696926974
EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
2697026975

llvm/test/CodeGen/AArch64/expand-select.ll

Lines changed: 20 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,15 @@
44
define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, ptr %Out) {
55
; CHECK-LABEL: foo:
66
; CHECK: // %bb.0:
7-
; CHECK-NEXT: movi d0, #0000000000000000
8-
; CHECK-NEXT: and w8, w0, #0x1
9-
; CHECK-NEXT: ldr x11, [sp]
10-
; CHECK-NEXT: fmov s1, w8
11-
; CHECK-NEXT: ldp x8, x10, [sp, #8]
12-
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
13-
; CHECK-NEXT: fmov w9, s0
14-
; CHECK-NEXT: tst w9, #0x1
15-
; CHECK-NEXT: csel x8, x5, x8, ne
16-
; CHECK-NEXT: csel x9, x4, x11, ne
17-
; CHECK-NEXT: stp x9, x8, [x10, #16]
18-
; CHECK-NEXT: csel x8, x3, x7, ne
19-
; CHECK-NEXT: csel x9, x2, x6, ne
20-
; CHECK-NEXT: stp x9, x8, [x10]
7+
; CHECK-NEXT: ldp x8, x9, [sp, #8]
8+
; CHECK-NEXT: tst w0, #0x1
9+
; CHECK-NEXT: ldr x10, [sp]
10+
; CHECK-NEXT: csel x8, x5, x8, eq
11+
; CHECK-NEXT: csel x10, x4, x10, eq
12+
; CHECK-NEXT: stp x10, x8, [x9, #16]
13+
; CHECK-NEXT: csel x8, x3, x7, eq
14+
; CHECK-NEXT: csel x10, x2, x6, eq
15+
; CHECK-NEXT: stp x10, x8, [x9]
2116
; CHECK-NEXT: ret
2217
%cond = and i32 %In1, 1
2318
%cbool = icmp eq i32 %cond, 0
@@ -31,22 +26,17 @@ define void @foo(i32 %In1, <2 x i128> %In2, <2 x i128> %In3, ptr %Out) {
3126
define void @bar(i32 %In1, <2 x i96> %In2, <2 x i96> %In3, ptr %Out) {
3227
; CHECK-LABEL: bar:
3328
; CHECK: // %bb.0:
34-
; CHECK-NEXT: movi d0, #0000000000000000
35-
; CHECK-NEXT: and w8, w0, #0x1
36-
; CHECK-NEXT: ldr x10, [sp, #16]
37-
; CHECK-NEXT: fmov s1, w8
38-
; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s
39-
; CHECK-NEXT: fmov w9, s0
40-
; CHECK-NEXT: tst w9, #0x1
41-
; CHECK-NEXT: ldp x8, x9, [sp]
42-
; CHECK-NEXT: csel x11, x2, x6, ne
43-
; CHECK-NEXT: str x11, [x10]
44-
; CHECK-NEXT: csel x8, x4, x8, ne
45-
; CHECK-NEXT: stur x8, [x10, #12]
46-
; CHECK-NEXT: csel x8, x5, x9, ne
47-
; CHECK-NEXT: csel x9, x3, x7, ne
48-
; CHECK-NEXT: str w8, [x10, #20]
49-
; CHECK-NEXT: str w9, [x10, #8]
29+
; CHECK-NEXT: ldp x8, x10, [sp]
30+
; CHECK-NEXT: tst w0, #0x1
31+
; CHECK-NEXT: ldr x9, [sp, #16]
32+
; CHECK-NEXT: csel x11, x2, x6, eq
33+
; CHECK-NEXT: csel x8, x4, x8, eq
34+
; CHECK-NEXT: str x11, [x9]
35+
; CHECK-NEXT: stur x8, [x9, #12]
36+
; CHECK-NEXT: csel x8, x5, x10, eq
37+
; CHECK-NEXT: csel x10, x3, x7, eq
38+
; CHECK-NEXT: str w8, [x9, #20]
39+
; CHECK-NEXT: str w10, [x9, #8]
5040
; CHECK-NEXT: ret
5141
%cond = and i32 %In1, 1
5242
%cbool = icmp eq i32 %cond, 0

llvm/test/CodeGen/AArch64/neon-anyof-splat.ll

Lines changed: 11 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -12,62 +12,13 @@ target triple = "aarch64-linux-gnu"
1212
define <4 x i32> @any_of_select_vf4(<4 x i32> %mask, <4 x i32> %a, <4 x i32> %b) {
1313
; CHECK-LABEL: any_of_select_vf4:
1414
; CHECK: // %bb.0:
15-
; CHECK-NEXT: sub sp, sp, #16
16-
; CHECK-NEXT: .cfi_def_cfa_offset 16
17-
; CHECK-NEXT: adrp x8, .LCPI0_0
1815
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
19-
; CHECK-NEXT: movi d3, #0000000000000000
20-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0]
21-
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
22-
; CHECK-NEXT: movi v4.16b, #15
23-
; CHECK-NEXT: addv s0, v0.4s
24-
; CHECK-NEXT: and v3.16b, v3.16b, v4.16b
25-
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
26-
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
27-
; CHECK-NEXT: dup v0.16b, v3.b[0]
28-
; CHECK-NEXT: umov w8, v3.b[0]
29-
; CHECK-NEXT: umov w9, v0.b[1]
30-
; CHECK-NEXT: umov w10, v0.b[2]
31-
; CHECK-NEXT: umov w11, v0.b[7]
32-
; CHECK-NEXT: and x8, x8, #0xf
33-
; CHECK-NEXT: bfi x8, x9, #4, #4
34-
; CHECK-NEXT: umov w9, v0.b[3]
35-
; CHECK-NEXT: bfi x8, x10, #8, #4
36-
; CHECK-NEXT: umov w10, v0.b[4]
37-
; CHECK-NEXT: bfi x8, x9, #12, #4
38-
; CHECK-NEXT: umov w9, v0.b[5]
39-
; CHECK-NEXT: bfi x8, x10, #16, #4
40-
; CHECK-NEXT: umov w10, v0.b[6]
41-
; CHECK-NEXT: bfi x8, x9, #20, #4
42-
; CHECK-NEXT: umov w9, v0.b[8]
43-
; CHECK-NEXT: bfi x8, x10, #24, #4
44-
; CHECK-NEXT: lsl w10, w11, #28
45-
; CHECK-NEXT: umov w11, v0.b[9]
46-
; CHECK-NEXT: orr x8, x8, x10
47-
; CHECK-NEXT: and w9, w9, #0xf
48-
; CHECK-NEXT: umov w10, v0.b[10]
49-
; CHECK-NEXT: orr x8, x8, x9, lsl #32
50-
; CHECK-NEXT: and w9, w11, #0xf
51-
; CHECK-NEXT: umov w11, v0.b[11]
52-
; CHECK-NEXT: orr x8, x8, x9, lsl #36
53-
; CHECK-NEXT: and w9, w10, #0xf
54-
; CHECK-NEXT: umov w10, v0.b[12]
55-
; CHECK-NEXT: orr x8, x8, x9, lsl #40
56-
; CHECK-NEXT: and w9, w11, #0xf
57-
; CHECK-NEXT: umov w11, v0.b[13]
58-
; CHECK-NEXT: orr x8, x8, x9, lsl #44
59-
; CHECK-NEXT: and w9, w10, #0xf
60-
; CHECK-NEXT: umov w10, v0.b[14]
61-
; CHECK-NEXT: orr x8, x8, x9, lsl #48
62-
; CHECK-NEXT: and w9, w11, #0xf
63-
; CHECK-NEXT: orr x8, x8, x9, lsl #52
64-
; CHECK-NEXT: umov w9, v0.b[15]
65-
; CHECK-NEXT: and w10, w10, #0xf
66-
; CHECK-NEXT: orr x8, x8, x10, lsl #56
67-
; CHECK-NEXT: orr x8, x8, x9, lsl #60
68-
; CHECK-NEXT: dup v0.2d, x8
69-
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
70-
; CHECK-NEXT: add sp, sp, #16
16+
; CHECK-NEXT: umaxv s0, v0.4s
17+
; CHECK-NEXT: fmov w8, s0
18+
; CHECK-NEXT: tst w8, #0x1
19+
; CHECK-NEXT: csetm w8, ne
20+
; CHECK-NEXT: dup v0.4s, w8
21+
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
7122
; CHECK-NEXT: ret
7223
%cmp = icmp slt <4 x i32> %mask, zeroinitializer
7324
%cmp.bc = bitcast <4 x i1> %cmp to i4
@@ -79,63 +30,13 @@ define <4 x i32> @any_of_select_vf4(<4 x i32> %mask, <4 x i32> %a, <4 x i32> %b)
7930
define <2 x i64> @any_of_select_vf2(<2 x i64> %mask, <2 x i64> %a, <2 x i64> %b) {
8031
; CHECK-LABEL: any_of_select_vf2:
8132
; CHECK: // %bb.0:
82-
; CHECK-NEXT: sub sp, sp, #16
83-
; CHECK-NEXT: .cfi_def_cfa_offset 16
84-
; CHECK-NEXT: adrp x8, .LCPI1_0
8533
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
86-
; CHECK-NEXT: movi d3, #0000000000000000
87-
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI1_0]
88-
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
89-
; CHECK-NEXT: movi v4.16b, #3
90-
; CHECK-NEXT: addp d0, v0.2d
91-
; CHECK-NEXT: and v3.16b, v3.16b, v4.16b
92-
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
93-
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
94-
; CHECK-NEXT: dup v0.16b, v3.b[0]
95-
; CHECK-NEXT: umov w8, v3.b[0]
96-
; CHECK-NEXT: umov w9, v0.b[1]
97-
; CHECK-NEXT: umov w10, v0.b[2]
98-
; CHECK-NEXT: umov w11, v0.b[7]
99-
; CHECK-NEXT: umov w12, v0.b[8]
100-
; CHECK-NEXT: and w8, w8, #0x3
101-
; CHECK-NEXT: umov w13, v0.b[3]
102-
; CHECK-NEXT: umov w14, v0.b[4]
103-
; CHECK-NEXT: umov w15, v0.b[10]
104-
; CHECK-NEXT: umov w16, v0.b[5]
105-
; CHECK-NEXT: bfi w8, w9, #2, #2
106-
; CHECK-NEXT: umov w9, v0.b[9]
107-
; CHECK-NEXT: ubfiz w11, w11, #14, #2
108-
; CHECK-NEXT: ubfiz w12, w12, #16, #2
109-
; CHECK-NEXT: bfi w8, w10, #4, #2
110-
; CHECK-NEXT: umov w10, v0.b[11]
111-
; CHECK-NEXT: ubfiz w15, w15, #20, #2
112-
; CHECK-NEXT: orr w11, w11, w12
113-
; CHECK-NEXT: umov w12, v0.b[13]
114-
; CHECK-NEXT: bfi w8, w13, #6, #2
115-
; CHECK-NEXT: umov w13, v0.b[12]
116-
; CHECK-NEXT: ubfiz w9, w9, #18, #2
117-
; CHECK-NEXT: bfi w8, w14, #8, #2
118-
; CHECK-NEXT: umov w14, v0.b[14]
119-
; CHECK-NEXT: orr w9, w11, w9
120-
; CHECK-NEXT: umov w11, v0.b[6]
121-
; CHECK-NEXT: ubfiz w10, w10, #22, #2
122-
; CHECK-NEXT: orr w9, w9, w15
123-
; CHECK-NEXT: ubfiz w13, w13, #24, #2
124-
; CHECK-NEXT: bfi w8, w16, #10, #2
125-
; CHECK-NEXT: orr w9, w9, w10
126-
; CHECK-NEXT: ubfiz w10, w12, #26, #2
127-
; CHECK-NEXT: orr w9, w9, w13
128-
; CHECK-NEXT: ubfiz w12, w14, #28, #2
129-
; CHECK-NEXT: umov w13, v0.b[15]
130-
; CHECK-NEXT: bfi w8, w11, #12, #2
131-
; CHECK-NEXT: orr w9, w9, w10
132-
; CHECK-NEXT: orr w9, w9, w12
133-
; CHECK-NEXT: orr w8, w8, w9
134-
; CHECK-NEXT: orr w8, w8, w13, lsl #30
135-
; CHECK-NEXT: orr x8, x8, x8, lsl #32
34+
; CHECK-NEXT: umaxv s0, v0.4s
35+
; CHECK-NEXT: fmov w8, s0
36+
; CHECK-NEXT: tst w8, #0x1
37+
; CHECK-NEXT: csetm x8, ne
13638
; CHECK-NEXT: dup v0.2d, x8
137-
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
138-
; CHECK-NEXT: add sp, sp, #16
39+
; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b
13940
; CHECK-NEXT: ret
14041
%cmp = icmp slt <2 x i64> %mask, zeroinitializer
14142
%cmp.bc = bitcast <2 x i1> %cmp to i2

0 commit comments

Comments
 (0)