1- ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
1+ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --filter-out "kill:" -- version 4
22; RUN: llc -mattr=+sve < %s | FileCheck %s -check-prefix CHECK-SVE
33; RUN: llc -mattr=+sve2p1 < %s | FileCheck %s -check-prefix CHECK-SVE2p1
44target triple = "aarch64-linux"
@@ -7,18 +7,18 @@ target triple = "aarch64-linux"
77
88define void @test_2x8bit_mask_with_32bit_index_and_trip_count (i32 %i , i32 %n ) #0 {
99; CHECK-SVE-LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
10- ; CHECK-SVE: // %bb.0:
11- ; CHECK-SVE-NEXT : whilelo p1.b, w0, w1
12- ; CHECK-SVE-NEXT : punpklo p0.h, p1.b
13- ; CHECK-SVE-NEXT : punpkhi p1.h, p1.b
14- ; CHECK-SVE-NEXT : b use
10+ ; CHECK-SVE: // %bb.0:
11+ ; CHECK-SVE: whilelo p1.b, w0, w1
12+ ; CHECK-SVE: punpklo p0.h, p1.b
13+ ; CHECK-SVE: punpkhi p1.h, p1.b
14+ ; CHECK-SVE: b use
1515;
1616; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_32bit_index_and_trip_count:
17- ; CHECK-SVE2p1: // %bb.0:
18- ; CHECK-SVE2p1-NEXT : mov w8, w1
19- ; CHECK-SVE2p1-NEXT : mov w9, w0
20- ; CHECK-SVE2p1-NEXT : whilelo { p0.h, p1.h }, x9, x8
21- ; CHECK-SVE2p1-NEXT : b use
17+ ; CHECK-SVE2p1: // %bb.0:
18+ ; CHECK-SVE2p1: mov w8, w1
19+ ; CHECK-SVE2p1: mov w9, w0
20+ ; CHECK-SVE2p1: whilelo { p0.h, p1.h }, x9, x8
21+ ; CHECK-SVE2p1: b use
2222 %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i32 (i32 %i , i32 %n )
2323 %v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
2424 %v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -28,16 +28,16 @@ define void @test_2x8bit_mask_with_32bit_index_and_trip_count(i32 %i, i32 %n) #0
2828
2929define void @test_2x8bit_mask_with_64bit_index_and_trip_count (i64 %i , i64 %n ) #0 {
3030; CHECK-SVE-LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
31- ; CHECK-SVE: // %bb.0:
32- ; CHECK-SVE-NEXT : whilelo p1.b, x0, x1
33- ; CHECK-SVE-NEXT : punpklo p0.h, p1.b
34- ; CHECK-SVE-NEXT : punpkhi p1.h, p1.b
35- ; CHECK-SVE-NEXT : b use
31+ ; CHECK-SVE: // %bb.0:
32+ ; CHECK-SVE: whilelo p1.b, x0, x1
33+ ; CHECK-SVE: punpklo p0.h, p1.b
34+ ; CHECK-SVE: punpkhi p1.h, p1.b
35+ ; CHECK-SVE: b use
3636;
3737; CHECK-SVE2p1-LABEL: test_2x8bit_mask_with_64bit_index_and_trip_count:
38- ; CHECK-SVE2p1: // %bb.0:
39- ; CHECK-SVE2p1-NEXT : whilelo { p0.h, p1.h }, x0, x1
40- ; CHECK-SVE2p1-NEXT : b use
38+ ; CHECK-SVE2p1: // %bb.0:
39+ ; CHECK-SVE2p1: whilelo { p0.h, p1.h }, x0, x1
40+ ; CHECK-SVE2p1: b use
4141 %r = call <vscale x 16 x i1 > @llvm.get.active.lane.mask.nxv16i1.i64 (i64 %i , i64 %n )
4242 %v0 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 0 )
4343 %v1 = call <vscale x 8 x i1 > @llvm.vector.extract.nxv8i1.nxv16i1.i64 (<vscale x 16 x i1 > %r , i64 8 )
@@ -47,18 +47,18 @@ define void @test_2x8bit_mask_with_64bit_index_and_trip_count(i64 %i, i64 %n) #0
4747
4848define void @test_edge_case_2x1bit_mask (i64 %i , i64 %n ) #0 {
4949; CHECK-SVE-LABEL: test_edge_case_2x1bit_mask:
50- ; CHECK-SVE: // %bb.0:
51- ; CHECK-SVE-NEXT : whilelo p1.d, x0, x1
52- ; CHECK-SVE-NEXT : punpklo p0.h, p1.b
53- ; CHECK-SVE-NEXT : punpkhi p1.h, p1.b
54- ; CHECK-SVE-NEXT : b use
50+ ; CHECK-SVE: // %bb.0:
51+ ; CHECK-SVE: whilelo p1.d, x0, x1
52+ ; CHECK-SVE: punpklo p0.h, p1.b
53+ ; CHECK-SVE: punpkhi p1.h, p1.b
54+ ; CHECK-SVE: b use
5555;
5656; CHECK-SVE2p1-LABEL: test_edge_case_2x1bit_mask:
57- ; CHECK-SVE2p1: // %bb.0:
58- ; CHECK-SVE2p1-NEXT : whilelo p1.d, x0, x1
59- ; CHECK-SVE2p1-NEXT : punpklo p0.h, p1.b
60- ; CHECK-SVE2p1-NEXT : punpkhi p1.h, p1.b
61- ; CHECK-SVE2p1-NEXT : b use
57+ ; CHECK-SVE2p1: // %bb.0:
58+ ; CHECK-SVE2p1: whilelo p1.d, x0, x1
59+ ; CHECK-SVE2p1: punpklo p0.h, p1.b
60+ ; CHECK-SVE2p1: punpkhi p1.h, p1.b
61+ ; CHECK-SVE2p1: b use
6262 %r = call <vscale x 2 x i1 > @llvm.get.active.lane.mask.nxv2i1.i64 (i64 %i , i64 %n )
6363 %v0 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 0 )
6464 %v1 = call <vscale x 1 x i1 > @llvm.vector.extract.nxv1i1.nxv2i1.i64 (<vscale x 2 x i1 > %r , i64 1 )
@@ -68,49 +68,85 @@ define void @test_edge_case_2x1bit_mask(i64 %i, i64 %n) #0 {
6868
6969define void @test_boring_case_2x2bit_mask (i64 %i , i64 %n ) #0 {
7070; CHECK-SVE-LABEL: test_boring_case_2x2bit_mask:
71- ; CHECK-SVE: // %bb.0:
72- ; CHECK-SVE-NEXT : whilelo p1.s, x0, x1
73- ; CHECK-SVE-NEXT : punpklo p0.h, p1.b
74- ; CHECK-SVE-NEXT : punpkhi p1.h, p1.b
75- ; CHECK-SVE-NEXT : b use
71+ ; CHECK-SVE: // %bb.0:
72+ ; CHECK-SVE: whilelo p1.s, x0, x1
73+ ; CHECK-SVE: punpklo p0.h, p1.b
74+ ; CHECK-SVE: punpkhi p1.h, p1.b
75+ ; CHECK-SVE: b use
7676;
7777; CHECK-SVE2p1-LABEL: test_boring_case_2x2bit_mask:
78- ; CHECK-SVE2p1: // %bb.0:
79- ; CHECK-SVE2p1-NEXT : whilelo { p0.d, p1.d }, x0, x1
80- ; CHECK-SVE2p1-NEXT : b use
78+ ; CHECK-SVE2p1: // %bb.0:
79+ ; CHECK-SVE2p1: whilelo { p0.d, p1.d }, x0, x1
80+ ; CHECK-SVE2p1: b use
8181 %r = call <vscale x 4 x i1 > @llvm.get.active.lane.mask.nxv4i1.i64 (i64 %i , i64 %n )
8282 %v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 0 )
8383 %v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv4i1.i64 (<vscale x 4 x i1 > %r , i64 2 )
8484 tail call void @use (<vscale x 2 x i1 > %v0 , <vscale x 2 x i1 > %v1 )
8585 ret void
8686}
8787
88-
88+ ; Negative test for when not extracting exactly two halves of the source vector
8989define void @test_partial_extract (i64 %i , i64 %n ) #0 {
9090; CHECK-SVE-LABEL: test_partial_extract:
91- ; CHECK-SVE: // %bb.0:
92- ; CHECK-SVE-NEXT : whilelo p0.h, x0, x1
93- ; CHECK-SVE-NEXT : punpklo p1.h, p0.b
94- ; CHECK-SVE-NEXT : punpkhi p2.h, p0.b
95- ; CHECK-SVE-NEXT : punpklo p0.h, p1.b
96- ; CHECK-SVE-NEXT : punpklo p1.h, p2.b
97- ; CHECK-SVE-NEXT : b use
91+ ; CHECK-SVE: // %bb.0:
92+ ; CHECK-SVE: whilelo p0.h, x0, x1
93+ ; CHECK-SVE: punpklo p1.h, p0.b
94+ ; CHECK-SVE: punpkhi p2.h, p0.b
95+ ; CHECK-SVE: punpklo p0.h, p1.b
96+ ; CHECK-SVE: punpklo p1.h, p2.b
97+ ; CHECK-SVE: b use
9898;
9999; CHECK-SVE2p1-LABEL: test_partial_extract:
100- ; CHECK-SVE2p1: // %bb.0:
101- ; CHECK-SVE2p1-NEXT : whilelo p0.h, x0, x1
102- ; CHECK-SVE2p1-NEXT : punpklo p1.h, p0.b
103- ; CHECK-SVE2p1-NEXT : punpkhi p2.h, p0.b
104- ; CHECK-SVE2p1-NEXT : punpklo p0.h, p1.b
105- ; CHECK-SVE2p1-NEXT : punpklo p1.h, p2.b
106- ; CHECK-SVE2p1-NEXT : b use
100+ ; CHECK-SVE2p1: // %bb.0:
101+ ; CHECK-SVE2p1: whilelo p0.h, x0, x1
102+ ; CHECK-SVE2p1: punpklo p1.h, p0.b
103+ ; CHECK-SVE2p1: punpkhi p2.h, p0.b
104+ ; CHECK-SVE2p1: punpklo p0.h, p1.b
105+ ; CHECK-SVE2p1: punpklo p1.h, p2.b
106+ ; CHECK-SVE2p1: b use
107107 %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
108108 %v0 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
109109 %v1 = call <vscale x 2 x i1 > @llvm.vector.extract.nxv2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
110110 tail call void @use (<vscale x 2 x i1 > %v0 , <vscale x 2 x i1 > %v1 )
111111 ret void
112112}
113113
114+ ;; Negative test for when extracting a fixed-length vector.
115+ define void @test_fixed_extract (i64 %i , i64 %n ) #0 {
116+ ; CHECK-SVE-LABEL: test_fixed_extract:
117+ ; CHECK-SVE: // %bb.0:
118+ ; CHECK-SVE: whilelo p0.h, x0, x1
119+ ; CHECK-SVE: cset w8, mi
120+ ; CHECK-SVE: mov z0.h, p0/z, #1 // =0x1
121+ ; CHECK-SVE: umov w9, v0.h[4]
122+ ; CHECK-SVE: umov w10, v0.h[1]
123+ ; CHECK-SVE: umov w11, v0.h[5]
124+ ; CHECK-SVE: fmov s0, w8
125+ ; CHECK-SVE: fmov s1, w9
126+ ; CHECK-SVE: mov v0.s[1], w10
127+ ; CHECK-SVE: mov v1.s[1], w11
128+ ; CHECK-SVE: b use
129+ ;
130+ ; CHECK-SVE2p1-LABEL: test_fixed_extract:
131+ ; CHECK-SVE2p1: // %bb.0:
132+ ; CHECK-SVE2p1: whilelo p0.h, x0, x1
133+ ; CHECK-SVE2p1: cset w8, mi
134+ ; CHECK-SVE2p1: mov z0.h, p0/z, #1 // =0x1
135+ ; CHECK-SVE2p1: umov w9, v0.h[4]
136+ ; CHECK-SVE2p1: umov w10, v0.h[1]
137+ ; CHECK-SVE2p1: umov w11, v0.h[5]
138+ ; CHECK-SVE2p1: fmov s0, w8
139+ ; CHECK-SVE2p1: fmov s1, w9
140+ ; CHECK-SVE2p1: mov v0.s[1], w10
141+ ; CHECK-SVE2p1: mov v1.s[1], w11
142+ ; CHECK-SVE2p1: b use
143+ %r = call <vscale x 8 x i1 > @llvm.get.active.lane.mask.nxv8i1.i64 (i64 %i , i64 %n )
144+ %v0 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 0 )
145+ %v1 = call <2 x i1 > @llvm.vector.extract.v2i1.nxv8i1.i64 (<vscale x 8 x i1 > %r , i64 4 )
146+ tail call void @use (<2 x i1 > %v0 , <2 x i1 > %v1 )
147+ ret void
148+ }
149+
114150declare void @use (...)
115151
116152attributes #0 = { nounwind }
0 commit comments