Skip to content

Commit 1262bd3

Browse files
committed
[AArch64] Codegen test for select from canonical fixed-width AnyOf
1 parent cc72171 commit 1262bd3

File tree

1 file changed

+145
-0
lines changed

1 file changed

+145
-0
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc %s -o - | FileCheck %s
3+
target triple = "aarch64-linux-gnu"
4+
5+
;; An 'AnyOf' reduction (vector.reduce.or) is instcombined to a bitcast to an
6+
;; integer of a bitwidth equal to the number of lanes being reduced, then
7+
;; compared against zero. To select between vectors for NEON, we then need to
8+
;; broadcast the result, but we must be careful when the bitwidth of the scalar
9+
;; result is smaller than the element size of the vectors being selected. We
10+
;; don't want to end up with scalarization.
11+
12+
define <4 x i32> @any_of_select_vf4(<4 x i32> %mask, <4 x i32> %a, <4 x i32> %b) {
13+
; CHECK-LABEL: any_of_select_vf4:
14+
; CHECK: // %bb.0:
15+
; CHECK-NEXT: sub sp, sp, #16
16+
; CHECK-NEXT: .cfi_def_cfa_offset 16
17+
; CHECK-NEXT: adrp x8, .LCPI0_0
18+
; CHECK-NEXT: cmlt v0.4s, v0.4s, #0
19+
; CHECK-NEXT: movi d3, #0000000000000000
20+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0]
21+
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
22+
; CHECK-NEXT: movi v4.16b, #15
23+
; CHECK-NEXT: addv s0, v0.4s
24+
; CHECK-NEXT: and v3.16b, v3.16b, v4.16b
25+
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
26+
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
27+
; CHECK-NEXT: dup v0.16b, v3.b[0]
28+
; CHECK-NEXT: umov w8, v3.b[0]
29+
; CHECK-NEXT: umov w9, v0.b[1]
30+
; CHECK-NEXT: umov w10, v0.b[2]
31+
; CHECK-NEXT: umov w11, v0.b[7]
32+
; CHECK-NEXT: and x8, x8, #0xf
33+
; CHECK-NEXT: bfi x8, x9, #4, #4
34+
; CHECK-NEXT: umov w9, v0.b[3]
35+
; CHECK-NEXT: bfi x8, x10, #8, #4
36+
; CHECK-NEXT: umov w10, v0.b[4]
37+
; CHECK-NEXT: bfi x8, x9, #12, #4
38+
; CHECK-NEXT: umov w9, v0.b[5]
39+
; CHECK-NEXT: bfi x8, x10, #16, #4
40+
; CHECK-NEXT: umov w10, v0.b[6]
41+
; CHECK-NEXT: bfi x8, x9, #20, #4
42+
; CHECK-NEXT: umov w9, v0.b[8]
43+
; CHECK-NEXT: bfi x8, x10, #24, #4
44+
; CHECK-NEXT: lsl w10, w11, #28
45+
; CHECK-NEXT: umov w11, v0.b[9]
46+
; CHECK-NEXT: orr x8, x8, x10
47+
; CHECK-NEXT: and w9, w9, #0xf
48+
; CHECK-NEXT: umov w10, v0.b[10]
49+
; CHECK-NEXT: orr x8, x8, x9, lsl #32
50+
; CHECK-NEXT: and w9, w11, #0xf
51+
; CHECK-NEXT: umov w11, v0.b[11]
52+
; CHECK-NEXT: orr x8, x8, x9, lsl #36
53+
; CHECK-NEXT: and w9, w10, #0xf
54+
; CHECK-NEXT: umov w10, v0.b[12]
55+
; CHECK-NEXT: orr x8, x8, x9, lsl #40
56+
; CHECK-NEXT: and w9, w11, #0xf
57+
; CHECK-NEXT: umov w11, v0.b[13]
58+
; CHECK-NEXT: orr x8, x8, x9, lsl #44
59+
; CHECK-NEXT: and w9, w10, #0xf
60+
; CHECK-NEXT: umov w10, v0.b[14]
61+
; CHECK-NEXT: orr x8, x8, x9, lsl #48
62+
; CHECK-NEXT: and w9, w11, #0xf
63+
; CHECK-NEXT: orr x8, x8, x9, lsl #52
64+
; CHECK-NEXT: umov w9, v0.b[15]
65+
; CHECK-NEXT: and w10, w10, #0xf
66+
; CHECK-NEXT: orr x8, x8, x10, lsl #56
67+
; CHECK-NEXT: orr x8, x8, x9, lsl #60
68+
; CHECK-NEXT: dup v0.2d, x8
69+
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
70+
; CHECK-NEXT: add sp, sp, #16
71+
; CHECK-NEXT: ret
72+
%cmp = icmp slt <4 x i32> %mask, zeroinitializer
73+
%cmp.bc = bitcast <4 x i1> %cmp to i4
74+
%cmp.bc.not = icmp eq i4 %cmp.bc, 0
75+
%res = select i1 %cmp.bc.not, <4 x i32> %a, <4 x i32> %b
76+
ret <4 x i32> %res
77+
}
78+
79+
define <2 x i64> @any_of_select_vf2(<2 x i64> %mask, <2 x i64> %a, <2 x i64> %b) {
80+
; CHECK-LABEL: any_of_select_vf2:
81+
; CHECK: // %bb.0:
82+
; CHECK-NEXT: sub sp, sp, #16
83+
; CHECK-NEXT: .cfi_def_cfa_offset 16
84+
; CHECK-NEXT: adrp x8, .LCPI1_0
85+
; CHECK-NEXT: cmlt v0.2d, v0.2d, #0
86+
; CHECK-NEXT: movi d3, #0000000000000000
87+
; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI1_0]
88+
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
89+
; CHECK-NEXT: movi v4.16b, #3
90+
; CHECK-NEXT: addp d0, v0.2d
91+
; CHECK-NEXT: and v3.16b, v3.16b, v4.16b
92+
; CHECK-NEXT: and v0.16b, v0.16b, v4.16b
93+
; CHECK-NEXT: cmeq v3.16b, v0.16b, v3.16b
94+
; CHECK-NEXT: dup v0.16b, v3.b[0]
95+
; CHECK-NEXT: umov w8, v3.b[0]
96+
; CHECK-NEXT: umov w9, v0.b[1]
97+
; CHECK-NEXT: umov w10, v0.b[2]
98+
; CHECK-NEXT: umov w11, v0.b[7]
99+
; CHECK-NEXT: umov w12, v0.b[8]
100+
; CHECK-NEXT: and w8, w8, #0x3
101+
; CHECK-NEXT: umov w13, v0.b[3]
102+
; CHECK-NEXT: umov w14, v0.b[4]
103+
; CHECK-NEXT: umov w15, v0.b[10]
104+
; CHECK-NEXT: umov w16, v0.b[5]
105+
; CHECK-NEXT: bfi w8, w9, #2, #2
106+
; CHECK-NEXT: umov w9, v0.b[9]
107+
; CHECK-NEXT: ubfiz w11, w11, #14, #2
108+
; CHECK-NEXT: ubfiz w12, w12, #16, #2
109+
; CHECK-NEXT: bfi w8, w10, #4, #2
110+
; CHECK-NEXT: umov w10, v0.b[11]
111+
; CHECK-NEXT: ubfiz w15, w15, #20, #2
112+
; CHECK-NEXT: orr w11, w11, w12
113+
; CHECK-NEXT: umov w12, v0.b[13]
114+
; CHECK-NEXT: bfi w8, w13, #6, #2
115+
; CHECK-NEXT: umov w13, v0.b[12]
116+
; CHECK-NEXT: ubfiz w9, w9, #18, #2
117+
; CHECK-NEXT: bfi w8, w14, #8, #2
118+
; CHECK-NEXT: umov w14, v0.b[14]
119+
; CHECK-NEXT: orr w9, w11, w9
120+
; CHECK-NEXT: umov w11, v0.b[6]
121+
; CHECK-NEXT: ubfiz w10, w10, #22, #2
122+
; CHECK-NEXT: orr w9, w9, w15
123+
; CHECK-NEXT: ubfiz w13, w13, #24, #2
124+
; CHECK-NEXT: bfi w8, w16, #10, #2
125+
; CHECK-NEXT: orr w9, w9, w10
126+
; CHECK-NEXT: ubfiz w10, w12, #26, #2
127+
; CHECK-NEXT: orr w9, w9, w13
128+
; CHECK-NEXT: ubfiz w12, w14, #28, #2
129+
; CHECK-NEXT: umov w13, v0.b[15]
130+
; CHECK-NEXT: bfi w8, w11, #12, #2
131+
; CHECK-NEXT: orr w9, w9, w10
132+
; CHECK-NEXT: orr w9, w9, w12
133+
; CHECK-NEXT: orr w8, w8, w9
134+
; CHECK-NEXT: orr w8, w8, w13, lsl #30
135+
; CHECK-NEXT: orr x8, x8, x8, lsl #32
136+
; CHECK-NEXT: dup v0.2d, x8
137+
; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b
138+
; CHECK-NEXT: add sp, sp, #16
139+
; CHECK-NEXT: ret
140+
%cmp = icmp slt <2 x i64> %mask, zeroinitializer
141+
%cmp.bc = bitcast <2 x i1> %cmp to i2
142+
%cmp.bc.not = icmp eq i2 %cmp.bc, 0
143+
%res = select i1 %cmp.bc.not, <2 x i64> %a, <2 x i64> %b
144+
ret <2 x i64> %res
145+
}

0 commit comments

Comments
 (0)