11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
22; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
3- ; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1
3+ ; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SVE2p1
4+ ; RUN: llc -mattr=+sve -mattr=+sme2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-SVE2p1-SME2 -check-prefix CHECK-SME2
45target triple = "aarch64-linux"
56
67; Test combining of getActiveLaneMask with a pair of extract_vector operations.
@@ -13,12 +14,12 @@ define void @test_2x8bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #0
1314; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
1415; CHECK-SVE-NEXT: b use
1516;
16- ; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
17- ; CHECK-SVE2p1: // %bb.0:
18- ; CHECK-SVE2p1-NEXT: mov w8, w1
19- ; CHECK-SVE2p1-NEXT: mov w9, w0
20- ; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x9, x8
21- ; CHECK-SVE2p1-NEXT: b use
17+ ; CHECK-SVE2p1-SME2- LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
18+ ; CHECK-SVE2p1-SME2 : // %bb.0:
19+ ; CHECK-SVE2p1-SME2- NEXT: mov w8, w1
20+ ; CHECK-SVE2p1-SME2- NEXT: mov w9, w0
21+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.h, p1.h }, x9, x8
22+ ; CHECK-SVE2p1-SME2- NEXT: b use
2223 %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i32 %i , i32 %n )
2324 %v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
2425 %v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -34,10 +35,10 @@ define void @test_2x8bit_mask_with_64bit_index_and_trip_count(i64 %i, i64 %n) #0
3435; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
3536; CHECK-SVE-NEXT: b use
3637;
37- ; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
38- ; CHECK-SVE2p1: // %bb.0:
39- ; CHECK-SVE2p1-NEXT: whilelo { p0.h, p1.h }, x0, x1
40- ; CHECK-SVE2p1-NEXT: b use
38+ ; CHECK-SVE2p1-SME2- LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
39+ ; CHECK-SVE2p1-SME2 : // %bb.0:
40+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.h, p1.h }, x0, x1
41+ ; CHECK-SVE2p1-SME2- NEXT: b use
4142 %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i64 (i64 %i , i64 %n )
4243 %v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
4344 %v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -53,12 +54,12 @@ define void @test_edge_case_2x1bit_mask(i64 %i, i64 %n) #0 {
5354; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
5455; CHECK-SVE-NEXT: b use
5556;
56- ; CHECK-SVE2p1-LABEL: test_edge_case_2x1bit_mask:
57- ; CHECK-SVE2p1: // %bb.0:
58- ; CHECK-SVE2p1-NEXT: whilelo p1.d, x0, x1
59- ; CHECK-SVE2p1-NEXT: punpklo p0.h, p1.b
60- ; CHECK-SVE2p1-NEXT: punpkhi p1.h, p1.b
61- ; CHECK-SVE2p1-NEXT: b use
57+ ; CHECK-SVE2p1-SME2- LABEL: test_edge_case_2x1bit_mask:
58+ ; CHECK-SVE2p1-SME2 : // %bb.0:
59+ ; CHECK-SVE2p1-SME2- NEXT: whilelo p1.d, x0, x1
60+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p0.h, p1.b
61+ ; CHECK-SVE2p1-SME2- NEXT: punpkhi p1.h, p1.b
62+ ; CHECK-SVE2p1-SME2- NEXT: b use
6263 %r = call <vscale x 2 x i1 > @llvm.get.active.lane.mask.nxv2i1.i64 (i64 %i , i64 %n )
6364 %v0 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 0 )
6465 %v1 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 1 )
@@ -74,10 +75,10 @@ define void @test_boring_case_2x2bit_mask(i64 %i, i64 %n) #0 {
7475; CHECK-SVE-NEXT: punpkhi p1.h, p1.b
7576; CHECK-SVE-NEXT: b use
7677;
77- ; CHECK-SVE2p1-LABEL: test_boring_case_2x2bit_mask:
78- ; CHECK-SVE2p1: // %bb.0:
79- ; CHECK-SVE2p1-NEXT: whilelo { p0.d, p1.d }, x0, x1
80- ; CHECK-SVE2p1-NEXT: b use
78+ ; CHECK-SVE2p1-SME2- LABEL: test_boring_case_2x2bit_mask:
79+ ; CHECK-SVE2p1-SME2 : // %bb.0:
80+ ; CHECK-SVE2p1-SME2- NEXT: whilelo { p0.d, p1.d }, x0, x1
81+ ; CHECK-SVE2p1-SME2- NEXT: b use
8182 %r = call <vscale x 4 x i1 > @llvm.get.active.lane.mask.nxv4i1.i64 (i64 %i , i64 %n )
8283 %v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 0 )
8384 %v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 2 )
@@ -96,22 +97,22 @@ define void @test_partial_extract(i64 %i, i64 %n) #0 {
9697; CHECK-SVE-NEXT: punpklo p1.h, p2.b
9798; CHECK-SVE-NEXT: b use
9899;
99- ; CHECK-SVE2p1-LABEL: test_partial_extract:
100- ; CHECK-SVE2p1: // %bb.0:
101- ; CHECK-SVE2p1-NEXT: whilelo p0.h, x0, x1
102- ; CHECK-SVE2p1-NEXT: punpklo p1.h, p0.b
103- ; CHECK-SVE2p1-NEXT: punpkhi p2.h, p0.b
104- ; CHECK-SVE2p1-NEXT: punpklo p0.h, p1.b
105- ; CHECK-SVE2p1-NEXT: punpklo p1.h, p2.b
106- ; CHECK-SVE2p1-NEXT: b use
100+ ; CHECK-SVE2p1-SME2- LABEL: test_partial_extract:
101+ ; CHECK-SVE2p1-SME2 : // %bb.0:
102+ ; CHECK-SVE2p1-SME2- NEXT: whilelo p0.h, x0, x1
103+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p1.h, p0.b
104+ ; CHECK-SVE2p1-SME2- NEXT: punpkhi p2.h, p0.b
105+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p0.h, p1.b
106+ ; CHECK-SVE2p1-SME2- NEXT: punpklo p1.h, p2.b
107+ ; CHECK-SVE2p1-SME2- NEXT: b use
107108 %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
108109 %v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
109110 %v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
110111 tail call void @use (<vscale x 2 x i1 > %v0 , <vscale x 2 x i1 > %v1 )
111112 ret void
112113}
113114
114- ;; Negative test for when extracting a fixed-length vector.
115+ ; Negative test for when extracting a fixed-length vector.
115116define void @test_fixed_extract (i64 %i , i64 %n ) #0 {
116117; CHECK-SVE-LABEL: test_fixed_extract:
117118; CHECK-SVE: // %bb.0:
@@ -144,13 +145,89 @@ define void @test_fixed_extract(i64 %i, i64 %n) #0 {
144145; CHECK-SVE2p1-NEXT: mov v1.s[1], w11
145146; CHECK-SVE2p1-NEXT: // kill: def $d1 killed $d1 killed $q1
146147; CHECK-SVE2p1-NEXT: b use
148+ ;
149+ ; CHECK-SME2-LABEL: test_fixed_extract:
150+ ; CHECK-SME2: // %bb.0:
151+ ; CHECK-SME2-NEXT: whilelo p0.h, x0, x1
152+ ; CHECK-SME2-NEXT: cset w8, mi
153+ ; CHECK-SME2-NEXT: mov z0.h, p0/z, #1 // =0x1
154+ ; CHECK-SME2-NEXT: mov z1.h, z0.h[1]
155+ ; CHECK-SME2-NEXT: mov z2.h, z0.h[5]
156+ ; CHECK-SME2-NEXT: mov z3.h, z0.h[4]
157+ ; CHECK-SME2-NEXT: fmov s0, w8
158+ ; CHECK-SME2-NEXT: zip1 z0.s, z0.s, z1.s
159+ ; CHECK-SME2-NEXT: zip1 z1.s, z3.s, z2.s
160+ ; CHECK-SME2-NEXT: // kill: def $d0 killed $d0 killed $z0
161+ ; CHECK-SME2-NEXT: // kill: def $d1 killed $d1 killed $z1
162+ ; CHECK-SME2-NEXT: b use
147163 %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
148164 %v0 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
149165 %v1 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
150166 tail call void @use (<2 x i1 > %v0 , <2 x i1 > %v1 )
151167 ret void
152168}
153169
170+ ; Illegal Types
171+
172+ define void @test_2x16bit_mask_with_32bit_index_and_trip_count (i32 %i , i32 %n ) #0 {
173+ ; CHECK-SVE-LABEL: test_2x16bit_mask_with_32bit_index_and_trip_count:
174+ ; CHECK-SVE: // %bb.0:
175+ ; CHECK-SVE-NEXT: rdvl x8, #1
176+ ; CHECK-SVE-NEXT: adds w8, w0, w8
177+ ; CHECK-SVE-NEXT: csinv w8, w8, wzr, lo
178+ ; CHECK-SVE-NEXT: whilelo p0.b, w0, w1
179+ ; CHECK-SVE-NEXT: whilelo p1.b, w8, w1
180+ ; CHECK-SVE-NEXT: b use
181+ ;
182+ ; CHECK-SVE2p1-SME2-LABEL: test_2x16bit_mask_with_32bit_index_and_trip_count:
183+ ; CHECK-SVE2p1-SME2: // %bb.0:
184+ ; CHECK-SVE2p1-SME2-NEXT: mov w8, w1
185+ ; CHECK-SVE2p1-SME2-NEXT: mov w9, w0
186+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.b, p1.b }, x9, x8
187+ ; CHECK-SVE2p1-SME2-NEXT: b use
188+ %r = call <vscale x 32 x i1 > @llvm.get.active.lane.mask.nxv32i1.i32 (i32 %i , i32 %n )
189+ %v0 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv32i1.i64 (<vscale x 32 x i1 > %r , i64 0 )
190+ %v1 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv32i1.i64 (<vscale x 32 x i1 > %r , i64 16 )
191+ tail call void @use (<vscale x 16 x i1 > %v0 , <vscale x 16 x i1 > %v1 )
192+ ret void
193+ }
194+
195+ define void @test_2x32bit_mask_with_32bit_index_and_trip_count (i32 %i , i32 %n ) #0 {
196+ ; CHECK-SVE-LABEL: test_2x32bit_mask_with_32bit_index_and_trip_count:
197+ ; CHECK-SVE: // %bb.0:
198+ ; CHECK-SVE-NEXT: rdvl x8, #2
199+ ; CHECK-SVE-NEXT: rdvl x9, #1
200+ ; CHECK-SVE-NEXT: adds w8, w0, w8
201+ ; CHECK-SVE-NEXT: csinv w8, w8, wzr, lo
202+ ; CHECK-SVE-NEXT: adds w10, w8, w9
203+ ; CHECK-SVE-NEXT: csinv w10, w10, wzr, lo
204+ ; CHECK-SVE-NEXT: whilelo p3.b, w10, w1
205+ ; CHECK-SVE-NEXT: adds w9, w0, w9
206+ ; CHECK-SVE-NEXT: csinv w9, w9, wzr, lo
207+ ; CHECK-SVE-NEXT: whilelo p0.b, w0, w1
208+ ; CHECK-SVE-NEXT: whilelo p1.b, w9, w1
209+ ; CHECK-SVE-NEXT: whilelo p2.b, w8, w1
210+ ; CHECK-SVE-NEXT: b use
211+ ;
212+ ; CHECK-SVE2p1-SME2-LABEL: test_2x32bit_mask_with_32bit_index_and_trip_count:
213+ ; CHECK-SVE2p1-SME2: // %bb.0:
214+ ; CHECK-SVE2p1-SME2-NEXT: rdvl x8, #2
215+ ; CHECK-SVE2p1-SME2-NEXT: mov w9, w1
216+ ; CHECK-SVE2p1-SME2-NEXT: mov w10, w0
217+ ; CHECK-SVE2p1-SME2-NEXT: adds w8, w0, w8
218+ ; CHECK-SVE2p1-SME2-NEXT: csinv w8, w8, wzr, lo
219+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p0.b, p1.b }, x10, x9
220+ ; CHECK-SVE2p1-SME2-NEXT: whilelo { p2.b, p3.b }, x8, x9
221+ ; CHECK-SVE2p1-SME2-NEXT: b use
222+ %r = call <vscale x 64 x i1 > @llvm.get.active.lane.mask.nxv64i1.i32 (i32 %i , i32 %n )
223+ %v0 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 0 )
224+ %v1 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 16 )
225+ %v2 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 32 )
226+ %v3 = call <vscale x 16 x i1 > @llvm.vector.extract.nxv16i1.nxv64i1.i64 (<vscale x 64 x i1 > %r , i64 48 )
227+ tail call void @use (<vscale x 16 x i1 > %v0 , <vscale x 16 x i1 > %v1 , <vscale x 16 x i1 > %v2 , <vscale x 16 x i1 > %v3 )
228+ ret void
229+ }
230+
154231declare void @use (...)
155232
156233attributes #0 = { nounwind }
0 commit comments