Skip to content

Commit 3f781f1

Browse files
committed
Initial clastb test
1 parent 85c17e4 commit 3f781f1

File tree

1 file changed

+169
-0
lines changed

1 file changed

+169
-0
lines changed
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2+
; RUN: llc -mtriple=aarch64-linux-unknown -mattr=+sve -o - < %s | FileCheck %s
3+
4+
define i8 @clastb_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pg, i8 %existing) {
5+
; CHECK-LABEL: clastb_i8:
6+
; CHECK: // %bb.0:
7+
; CHECK-NEXT: ptrue p1.b
8+
; CHECK-NEXT: rdvl x9, #1
9+
; CHECK-NEXT: rev p2.b, p0.b
10+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
11+
; CHECK-NEXT: cntp x8, p1, p1.b
12+
; CHECK-NEXT: mvn w8, w8
13+
; CHECK-NEXT: add w8, w8, w9
14+
; CHECK-NEXT: whilels p1.b, xzr, x8
15+
; CHECK-NEXT: ptest p0, p0.b
16+
; CHECK-NEXT: lastb w8, p1, z0.b
17+
; CHECK-NEXT: csel w0, w8, w0, ne
18+
; CHECK-NEXT: ret
19+
%rev.pg = call <vscale x 16 x i1> @llvm.vector.reverse.nxv16i1(<vscale x 16 x i1> %pg)
20+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1(<vscale x 16 x i1> %rev.pg, i1 false)
21+
%any.set = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> %pg)
22+
%vscale = call i32 @llvm.vscale.i32()
23+
%size = shl i32 %vscale, 4
24+
%sub = sub i32 %size, %tz.cnt
25+
%idx = sub i32 %sub, 1
26+
%extr = extractelement <vscale x 16 x i8> %data, i32 %idx
27+
%res = select i1 %any.set, i8 %extr, i8 %existing
28+
ret i8 %res
29+
}
30+
31+
define i16 @clastb_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pg, i16 %existing) {
32+
; CHECK-LABEL: clastb_i16:
33+
; CHECK: // %bb.0:
34+
; CHECK-NEXT: ptrue p1.h
35+
; CHECK-NEXT: cnth x9
36+
; CHECK-NEXT: rev p2.h, p0.h
37+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
38+
; CHECK-NEXT: cntp x8, p1, p1.h
39+
; CHECK-NEXT: mvn w8, w8
40+
; CHECK-NEXT: add w8, w8, w9
41+
; CHECK-NEXT: whilels p1.h, xzr, x8
42+
; CHECK-NEXT: lastb w8, p1, z0.h
43+
; CHECK-NEXT: ptrue p1.h
44+
; CHECK-NEXT: ptest p1, p0.b
45+
; CHECK-NEXT: csel w0, w8, w0, ne
46+
; CHECK-NEXT: ret
47+
%rev.pg = call <vscale x 8 x i1> @llvm.vector.reverse.nxv8i1(<vscale x 8 x i1> %pg)
48+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1(<vscale x 8 x i1> %rev.pg, i1 false)
49+
%any.set = call i1 @llvm.vector.reduce.or.nxv8i1(<vscale x 8 x i1> %pg)
50+
%vscale = call i32 @llvm.vscale.i32()
51+
%size = shl i32 %vscale, 3
52+
%sub = sub i32 %size, %tz.cnt
53+
%idx = sub i32 %sub, 1
54+
%extr = extractelement <vscale x 8 x i16> %data, i32 %idx
55+
%res = select i1 %any.set, i16 %extr, i16 %existing
56+
ret i16 %res
57+
}
58+
59+
define i32 @clastb_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pg, i32 %existing) {
60+
; CHECK-LABEL: clastb_i32:
61+
; CHECK: // %bb.0:
62+
; CHECK-NEXT: ptrue p1.s
63+
; CHECK-NEXT: cntw x9
64+
; CHECK-NEXT: rev p2.s, p0.s
65+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
66+
; CHECK-NEXT: cntp x8, p1, p1.s
67+
; CHECK-NEXT: mvn w8, w8
68+
; CHECK-NEXT: add w8, w8, w9
69+
; CHECK-NEXT: whilels p1.s, xzr, x8
70+
; CHECK-NEXT: lastb w8, p1, z0.s
71+
; CHECK-NEXT: ptrue p1.s
72+
; CHECK-NEXT: ptest p1, p0.b
73+
; CHECK-NEXT: csel w0, w8, w0, ne
74+
; CHECK-NEXT: ret
75+
%rev.pg = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %pg)
76+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1(<vscale x 4 x i1> %rev.pg, i1 false)
77+
%any.set = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> %pg)
78+
%vscale = call i32 @llvm.vscale.i32()
79+
%size = shl i32 %vscale, 2
80+
%sub = sub i32 %size, %tz.cnt
81+
%idx = sub i32 %sub, 1
82+
%extr = extractelement <vscale x 4 x i32> %data, i32 %idx
83+
%res = select i1 %any.set, i32 %extr, i32 %existing
84+
ret i32 %res
85+
}
86+
87+
define i64 @clastb_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pg, i64 %existing) {
88+
; CHECK-LABEL: clastb_i64:
89+
; CHECK: // %bb.0:
90+
; CHECK-NEXT: ptrue p1.d
91+
; CHECK-NEXT: cntd x9
92+
; CHECK-NEXT: rev p2.d, p0.d
93+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
94+
; CHECK-NEXT: cntp x8, p1, p1.d
95+
; CHECK-NEXT: mvn w8, w8
96+
; CHECK-NEXT: add w8, w8, w9
97+
; CHECK-NEXT: whilels p1.d, xzr, x8
98+
; CHECK-NEXT: lastb x8, p1, z0.d
99+
; CHECK-NEXT: ptrue p1.d
100+
; CHECK-NEXT: ptest p1, p0.b
101+
; CHECK-NEXT: csel x0, x8, x0, ne
102+
; CHECK-NEXT: ret
103+
%rev.pg = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %pg)
104+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %rev.pg, i1 false)
105+
%any.set = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %pg)
106+
%vscale = call i32 @llvm.vscale.i32()
107+
%size = shl i32 %vscale, 1
108+
%sub = sub i32 %size, %tz.cnt
109+
%idx = sub i32 %sub, 1
110+
%extr = extractelement <vscale x 2 x i64> %data, i32 %idx
111+
%res = select i1 %any.set, i64 %extr, i64 %existing
112+
ret i64 %res
113+
}
114+
115+
define float @clastb_float(<vscale x 4 x float> %data, <vscale x 4 x i1> %pg, float %existing) {
116+
; CHECK-LABEL: clastb_float:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: ptrue p1.s
119+
; CHECK-NEXT: cntw x9
120+
; CHECK-NEXT: rev p2.s, p0.s
121+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
122+
; CHECK-NEXT: cntp x8, p1, p1.s
123+
; CHECK-NEXT: mvn w8, w8
124+
; CHECK-NEXT: add w8, w8, w9
125+
; CHECK-NEXT: whilels p1.s, xzr, x8
126+
; CHECK-NEXT: lastb s0, p1, z0.s
127+
; CHECK-NEXT: ptrue p1.s
128+
; CHECK-NEXT: ptest p1, p0.b
129+
; CHECK-NEXT: fcsel s0, s0, s1, ne
130+
; CHECK-NEXT: ret
131+
%rev.pg = call <vscale x 4 x i1> @llvm.vector.reverse.nxv4i1(<vscale x 4 x i1> %pg)
132+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.float.nxv4i1(<vscale x 4 x i1> %rev.pg, i1 false)
133+
%any.set = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> %pg)
134+
%vscale = call i32 @llvm.vscale.float()
135+
%size = shl i32 %vscale, 2
136+
%sub = sub i32 %size, %tz.cnt
137+
%idx = sub i32 %sub, 1
138+
%extr = extractelement <vscale x 4 x float> %data, i32 %idx
139+
%res = select i1 %any.set, float %extr, float %existing
140+
ret float %res
141+
}
142+
143+
define double @clastb_double(<vscale x 2 x double> %data, <vscale x 2 x i1> %pg, double %existing) {
144+
; CHECK-LABEL: clastb_double:
145+
; CHECK: // %bb.0:
146+
; CHECK-NEXT: ptrue p1.d
147+
; CHECK-NEXT: cntd x9
148+
; CHECK-NEXT: rev p2.d, p0.d
149+
; CHECK-NEXT: brkb p1.b, p1/z, p2.b
150+
; CHECK-NEXT: cntp x8, p1, p1.d
151+
; CHECK-NEXT: mvn w8, w8
152+
; CHECK-NEXT: add w8, w8, w9
153+
; CHECK-NEXT: whilels p1.d, xzr, x8
154+
; CHECK-NEXT: lastb d0, p1, z0.d
155+
; CHECK-NEXT: ptrue p1.d
156+
; CHECK-NEXT: ptest p1, p0.b
157+
; CHECK-NEXT: fcsel d0, d0, d1, ne
158+
; CHECK-NEXT: ret
159+
%rev.pg = call <vscale x 2 x i1> @llvm.vector.reverse.nxv2i1(<vscale x 2 x i1> %pg)
160+
%tz.cnt = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1(<vscale x 2 x i1> %rev.pg, i1 false)
161+
%any.set = call i1 @llvm.vector.reduce.or.nxv2i1(<vscale x 2 x i1> %pg)
162+
%vscale = call i32 @llvm.vscale.i32()
163+
%size = shl i32 %vscale, 1
164+
%sub = sub i32 %size, %tz.cnt
165+
%idx = sub i32 %sub, 1
166+
%extr = extractelement <vscale x 2 x double> %data, i32 %idx
167+
%res = select i1 %any.set, double %extr, double %existing
168+
ret double %res
169+
}

0 commit comments

Comments
 (0)