11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3+ ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+disable-unpredicated-ld-st-lower < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,NO-UPLS-LOWER %s
4+ ; RUN: llc -mtriple=aarch64-linux-gnu -mcpu=a64fx < %s | FileCheck --check-prefixes=COMMON-NO-UPLS-LOWER,A64FX %s
35
46; LD1B
57
@@ -8,6 +10,12 @@ define <vscale x 16 x i8> @ld1b_lower_bound(ptr %a) {
810; CHECK: // %bb.0:
911; CHECK-NEXT: ldr z0, [x0, #-8, mul vl]
1012; CHECK-NEXT: ret
13+ ;
14+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1b_lower_bound:
15+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
16+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b
17+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #-8, mul vl]
18+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
1119 %base = getelementptr <vscale x 16 x i8 >, ptr %a , i64 -8
1220 %load = load <vscale x 16 x i8 >, ptr %base
1321 ret <vscale x 16 x i8 > %load
@@ -18,6 +26,12 @@ define <vscale x 16 x i8> @ld1b_inbound(ptr %a) {
1826; CHECK: // %bb.0:
1927; CHECK-NEXT: ldr z0, [x0, #2, mul vl]
2028; CHECK-NEXT: ret
29+ ;
30+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1b_inbound:
31+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
32+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b
33+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #2, mul vl]
34+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
2135 %base = getelementptr <vscale x 16 x i8 >, ptr %a , i64 2
2236 %load = load <vscale x 16 x i8 >, ptr %base
2337 ret <vscale x 16 x i8 > %load
@@ -28,6 +42,12 @@ define <vscale x 16 x i8> @ld1b_upper_bound(ptr %a) {
2842; CHECK: // %bb.0:
2943; CHECK-NEXT: ldr z0, [x0, #7, mul vl]
3044; CHECK-NEXT: ret
45+ ;
46+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1b_upper_bound:
47+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
48+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b
49+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, #7, mul vl]
50+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
3151 %base = getelementptr <vscale x 16 x i8 >, ptr %a , i64 7
3252 %load = load <vscale x 16 x i8 >, ptr %base
3353 ret <vscale x 16 x i8 > %load
@@ -38,6 +58,13 @@ define <vscale x 16 x i8> @ld1b_out_of_upper_bound(ptr %a) {
3858; CHECK: // %bb.0:
3959; CHECK-NEXT: ldr z0, [x0, #8, mul vl]
4060; CHECK-NEXT: ret
61+ ;
62+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1b_out_of_upper_bound:
63+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
64+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b
65+ ; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #8
66+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
67+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
4168 %base = getelementptr <vscale x 16 x i8 >, ptr %a , i64 8
4269 %load = load <vscale x 16 x i8 >, ptr %base
4370 ret <vscale x 16 x i8 > %load
@@ -48,6 +75,13 @@ define <vscale x 16 x i8> @ld1b_out_of_lower_bound(ptr %a) {
4875; CHECK: // %bb.0:
4976; CHECK-NEXT: ldr z0, [x0, #-9, mul vl]
5077; CHECK-NEXT: ret
78+ ;
79+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1b_out_of_lower_bound:
80+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
81+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.b
82+ ; COMMON-NO-UPLS-LOWER-NEXT: rdvl x8, #-9
83+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1b { z0.b }, p0/z, [x0, x8]
84+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
5185 %base = getelementptr <vscale x 16 x i8 >, ptr %a , i64 -9
5286 %load = load <vscale x 16 x i8 >, ptr %base
5387 ret <vscale x 16 x i8 > %load
@@ -60,6 +94,12 @@ define <vscale x 8 x i16> @ld1h_inbound(ptr %a) {
6094; CHECK: // %bb.0:
6195; CHECK-NEXT: ldr z0, [x0, #-2, mul vl]
6296; CHECK-NEXT: ret
97+ ;
98+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1h_inbound:
99+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
100+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.h
101+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1h { z0.h }, p0/z, [x0, #-2, mul vl]
102+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
63103 %base = getelementptr <vscale x 8 x i16 >, ptr %a , i64 -2
64104 %load = load <vscale x 8 x i16 >, ptr %base
65105 ret <vscale x 8 x i16 > %load
@@ -72,6 +112,12 @@ define <vscale x 4 x i32> @ld1s_inbound(ptr %a) {
72112; CHECK: // %bb.0:
73113; CHECK-NEXT: ldr z0, [x0, #4, mul vl]
74114; CHECK-NEXT: ret
115+ ;
116+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1s_inbound:
117+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
118+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.s
119+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1w { z0.s }, p0/z, [x0, #4, mul vl]
120+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
75121 %base = getelementptr <vscale x 4 x i32 >, ptr %a , i64 4
76122 %load = load <vscale x 4 x i32 >, ptr %base
77123 ret <vscale x 4 x i32 > %load
@@ -84,6 +130,12 @@ define <vscale x 2 x i64> @ld1d_inbound(ptr %a) {
84130; CHECK: // %bb.0:
85131; CHECK-NEXT: ldr z0, [x0, #6, mul vl]
86132; CHECK-NEXT: ret
133+ ;
134+ ; COMMON-NO-UPLS-LOWER-LABEL: ld1d_inbound:
135+ ; COMMON-NO-UPLS-LOWER: // %bb.0:
136+ ; COMMON-NO-UPLS-LOWER-NEXT: ptrue p0.d
137+ ; COMMON-NO-UPLS-LOWER-NEXT: ld1d { z0.d }, p0/z, [x0, #6, mul vl]
138+ ; COMMON-NO-UPLS-LOWER-NEXT: ret
87139 %base = getelementptr <vscale x 2 x i64 >, ptr %a , i64 6
88140 %load = load <vscale x 2 x i64 >, ptr %base
89141 ret <vscale x 2 x i64 > %load
@@ -97,6 +149,22 @@ define void @load_nxv6f16(ptr %a) {
97149; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
98150; CHECK-NEXT: ld1h { z0.s }, p1/z, [x0]
99151; CHECK-NEXT: ret
152+ ;
153+ ; NO-UPLS-LOWER-LABEL: load_nxv6f16:
154+ ; NO-UPLS-LOWER: // %bb.0:
155+ ; NO-UPLS-LOWER-NEXT: ptrue p0.d
156+ ; NO-UPLS-LOWER-NEXT: ptrue p1.s
157+ ; NO-UPLS-LOWER-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
158+ ; NO-UPLS-LOWER-NEXT: ld1h { z0.s }, p1/z, [x0]
159+ ; NO-UPLS-LOWER-NEXT: ret
160+ ;
161+ ; A64FX-LABEL: load_nxv6f16:
162+ ; A64FX: // %bb.0:
163+ ; A64FX-NEXT: ptrue p0.d
164+ ; A64FX-NEXT: ld1h { z0.d }, p0/z, [x0, #2, mul vl]
165+ ; A64FX-NEXT: ptrue p0.s
166+ ; A64FX-NEXT: ld1h { z0.s }, p0/z, [x0]
167+ ; A64FX-NEXT: ret
100168 %val = load volatile <vscale x 6 x half >, ptr %a
101169 ret void
102170}
@@ -108,6 +176,22 @@ define void @load_nxv6f32(ptr %a) {
108176; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
109177; CHECK-NEXT: ldr z0, [x0]
110178; CHECK-NEXT: ret
179+ ;
180+ ; NO-UPLS-LOWER-LABEL: load_nxv6f32:
181+ ; NO-UPLS-LOWER: // %bb.0:
182+ ; NO-UPLS-LOWER-NEXT: ptrue p0.d
183+ ; NO-UPLS-LOWER-NEXT: ptrue p1.s
184+ ; NO-UPLS-LOWER-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
185+ ; NO-UPLS-LOWER-NEXT: ld1w { z0.s }, p1/z, [x0]
186+ ; NO-UPLS-LOWER-NEXT: ret
187+ ;
188+ ; A64FX-LABEL: load_nxv6f32:
189+ ; A64FX: // %bb.0:
190+ ; A64FX-NEXT: ptrue p0.d
191+ ; A64FX-NEXT: ld1w { z0.d }, p0/z, [x0, #2, mul vl]
192+ ; A64FX-NEXT: ptrue p0.s
193+ ; A64FX-NEXT: ld1w { z0.s }, p0/z, [x0]
194+ ; A64FX-NEXT: ret
111195 %val = load volatile <vscale x 6 x float >, ptr %a
112196 ret void
113197}
@@ -119,6 +203,22 @@ define void @load_nxv12f16(ptr %a) {
119203; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl]
120204; CHECK-NEXT: ldr z0, [x0]
121205; CHECK-NEXT: ret
206+ ;
207+ ; NO-UPLS-LOWER-LABEL: load_nxv12f16:
208+ ; NO-UPLS-LOWER: // %bb.0:
209+ ; NO-UPLS-LOWER-NEXT: ptrue p0.s
210+ ; NO-UPLS-LOWER-NEXT: ptrue p1.h
211+ ; NO-UPLS-LOWER-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl]
212+ ; NO-UPLS-LOWER-NEXT: ld1h { z0.h }, p1/z, [x0]
213+ ; NO-UPLS-LOWER-NEXT: ret
214+ ;
215+ ; A64FX-LABEL: load_nxv12f16:
216+ ; A64FX: // %bb.0:
217+ ; A64FX-NEXT: ptrue p0.s
218+ ; A64FX-NEXT: ld1h { z0.s }, p0/z, [x0, #2, mul vl]
219+ ; A64FX-NEXT: ptrue p0.h
220+ ; A64FX-NEXT: ld1h { z0.h }, p0/z, [x0]
221+ ; A64FX-NEXT: ret
122222 %val = load volatile <vscale x 12 x half >, ptr %a
123223 ret void
124224}
0 commit comments