11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2- ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
2+ ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASE
3+ ; RUN: llc -mtriple=aarch64 -aarch64-sve-vector-bits-min=256 -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-VL256
34
45;; Full SVE vectors (supported with +sve)
56
@@ -42,7 +43,6 @@ define void @test_compressstore_nxv4f32(ptr %p, <vscale x 4 x float> %vec, <vsca
4243 ret void
4344}
4445
45- ; TODO: Legal and nonstreaming check
4646define void @test_compressstore_nxv2f64 (ptr %p , <vscale x 2 x double > %vec , <vscale x 2 x i1 > %mask ) {
4747; CHECK-LABEL: test_compressstore_nxv2f64:
4848; CHECK: // %bb.0:
@@ -56,6 +56,21 @@ define void @test_compressstore_nxv2f64(ptr %p, <vscale x 2 x double> %vec, <vsc
5656 ret void
5757}
5858
59+ ;; Unpacked SVE vector types
60+
61+ define void @test_compressstore_nxv2f32 (ptr %p , <vscale x 2 x float > %vec , <vscale x 2 x i1 > %mask ) {
62+ ; CHECK-LABEL: test_compressstore_nxv2f32:
63+ ; CHECK: // %bb.0:
64+ ; CHECK-NEXT: ptrue p1.d
65+ ; CHECK-NEXT: compact z0.d, p0, z0.d
66+ ; CHECK-NEXT: cntp x8, p1, p0.d
67+ ; CHECK-NEXT: whilelo p0.d, xzr, x8
68+ ; CHECK-NEXT: st1w { z0.d }, p0, [x0]
69+ ; CHECK-NEXT: ret
70+ tail call void @llvm.masked.compressstore.nxv2f32 (<vscale x 2 x float > %vec , ptr align 4 %p , <vscale x 2 x i1 > %mask )
71+ ret void
72+ }
73+
5974;; SVE vector types promoted to 32/64-bit (non-exhaustive)
6075
6176define void @test_compressstore_nxv2i8 (ptr %p , <vscale x 2 x i8 > %vec , <vscale x 2 x i1 > %mask ) {
@@ -86,8 +101,8 @@ define void @test_compressstore_nxv4i16(ptr %p, <vscale x 4 x i16> %vec, <vscale
86101
87102;; NEON vector types (promoted to SVE)
88103
89- define void @test_compressstore_v2f32 (ptr %p , <2 x double > %vec , <2 x i1 > %mask ) {
90- ; CHECK-LABEL: test_compressstore_v2f32 :
104+ define void @test_compressstore_v2f64 (ptr %p , <2 x double > %vec , <2 x i1 > %mask ) {
105+ ; CHECK-LABEL: test_compressstore_v2f64 :
91106; CHECK: // %bb.0:
92107; CHECK-NEXT: ushll v1.2d, v1.2s, #0
93108; CHECK-NEXT: ptrue p0.d, vl2
@@ -139,3 +154,133 @@ define void @test_compressstore_v2i64(ptr %p, <2 x i64> %vec, <2 x i1> %mask) {
139154 tail call void @llvm.masked.compressstore.v2i64 (<2 x i64 > %vec , ptr align 8 %p , <2 x i1 > %mask )
140155 ret void
141156}
157+
158+ define void @test_compressstore_v8i32 (ptr %p , <8 x i32 > %vec , <8 x i1 > %mask ) {
159+ ; CHECK-BASE-LABEL: test_compressstore_v8i32:
160+ ; CHECK-BASE: // %bb.0:
161+ ; CHECK-BASE-NEXT: shl v2.8b, v2.8b, #7
162+ ; CHECK-BASE-NEXT: adrp x8, .LCPI10_0
163+ ; CHECK-BASE-NEXT: ldr d3, [x8, :lo12:.LCPI10_0]
164+ ; CHECK-BASE-NEXT: cmlt v2.8b, v2.8b, #0
165+ ; CHECK-BASE-NEXT: and v2.8b, v2.8b, v3.8b
166+ ; CHECK-BASE-NEXT: addv b2, v2.8b
167+ ; CHECK-BASE-NEXT: fmov w8, s2
168+ ; CHECK-BASE-NEXT: tbnz w8, #0, .LBB10_9
169+ ; CHECK-BASE-NEXT: // %bb.1: // %else
170+ ; CHECK-BASE-NEXT: tbnz w8, #1, .LBB10_10
171+ ; CHECK-BASE-NEXT: .LBB10_2: // %else2
172+ ; CHECK-BASE-NEXT: tbnz w8, #2, .LBB10_11
173+ ; CHECK-BASE-NEXT: .LBB10_3: // %else5
174+ ; CHECK-BASE-NEXT: tbnz w8, #3, .LBB10_12
175+ ; CHECK-BASE-NEXT: .LBB10_4: // %else8
176+ ; CHECK-BASE-NEXT: tbnz w8, #4, .LBB10_13
177+ ; CHECK-BASE-NEXT: .LBB10_5: // %else11
178+ ; CHECK-BASE-NEXT: tbnz w8, #5, .LBB10_14
179+ ; CHECK-BASE-NEXT: .LBB10_6: // %else14
180+ ; CHECK-BASE-NEXT: tbnz w8, #6, .LBB10_15
181+ ; CHECK-BASE-NEXT: .LBB10_7: // %else17
182+ ; CHECK-BASE-NEXT: tbnz w8, #7, .LBB10_16
183+ ; CHECK-BASE-NEXT: .LBB10_8: // %else20
184+ ; CHECK-BASE-NEXT: ret
185+ ; CHECK-BASE-NEXT: .LBB10_9: // %cond.store
186+ ; CHECK-BASE-NEXT: st1 { v0.s }[0], [x0], #4
187+ ; CHECK-BASE-NEXT: tbz w8, #1, .LBB10_2
188+ ; CHECK-BASE-NEXT: .LBB10_10: // %cond.store1
189+ ; CHECK-BASE-NEXT: st1 { v0.s }[1], [x0], #4
190+ ; CHECK-BASE-NEXT: tbz w8, #2, .LBB10_3
191+ ; CHECK-BASE-NEXT: .LBB10_11: // %cond.store4
192+ ; CHECK-BASE-NEXT: st1 { v0.s }[2], [x0], #4
193+ ; CHECK-BASE-NEXT: tbz w8, #3, .LBB10_4
194+ ; CHECK-BASE-NEXT: .LBB10_12: // %cond.store7
195+ ; CHECK-BASE-NEXT: st1 { v0.s }[3], [x0], #4
196+ ; CHECK-BASE-NEXT: tbz w8, #4, .LBB10_5
197+ ; CHECK-BASE-NEXT: .LBB10_13: // %cond.store10
198+ ; CHECK-BASE-NEXT: st1 { v1.s }[0], [x0], #4
199+ ; CHECK-BASE-NEXT: tbz w8, #5, .LBB10_6
200+ ; CHECK-BASE-NEXT: .LBB10_14: // %cond.store13
201+ ; CHECK-BASE-NEXT: st1 { v1.s }[1], [x0], #4
202+ ; CHECK-BASE-NEXT: tbz w8, #6, .LBB10_7
203+ ; CHECK-BASE-NEXT: .LBB10_15: // %cond.store16
204+ ; CHECK-BASE-NEXT: st1 { v1.s }[2], [x0], #4
205+ ; CHECK-BASE-NEXT: tbz w8, #7, .LBB10_8
206+ ; CHECK-BASE-NEXT: .LBB10_16: // %cond.store19
207+ ; CHECK-BASE-NEXT: st1 { v1.s }[3], [x0]
208+ ; CHECK-BASE-NEXT: ret
209+ ;
210+ ; CHECK-VL256-LABEL: test_compressstore_v8i32:
211+ ; CHECK-VL256: // %bb.0:
212+ ; CHECK-VL256-NEXT: // kill: def $d2 killed $d2 def $z2
213+ ; CHECK-VL256-NEXT: ptrue p0.s, vl8
214+ ; CHECK-VL256-NEXT: // kill: def $q0 killed $q0 def $z0
215+ ; CHECK-VL256-NEXT: // kill: def $q1 killed $q1 def $z1
216+ ; CHECK-VL256-NEXT: uunpklo z2.h, z2.b
217+ ; CHECK-VL256-NEXT: ptrue p1.s, vl4
218+ ; CHECK-VL256-NEXT: splice z0.s, p1, z0.s, z1.s
219+ ; CHECK-VL256-NEXT: ptrue p1.s
220+ ; CHECK-VL256-NEXT: uunpklo z2.s, z2.h
221+ ; CHECK-VL256-NEXT: lsl z2.s, z2.s, #31
222+ ; CHECK-VL256-NEXT: asr z2.s, z2.s, #31
223+ ; CHECK-VL256-NEXT: cmpne p0.s, p0/z, z2.s, #0
224+ ; CHECK-VL256-NEXT: cntp x8, p1, p0.s
225+ ; CHECK-VL256-NEXT: compact z0.s, p0, z0.s
226+ ; CHECK-VL256-NEXT: whilelo p0.s, xzr, x8
227+ ; CHECK-VL256-NEXT: st1w { z0.s }, p0, [x0]
228+ ; CHECK-VL256-NEXT: ret
229+ tail call void @llvm.masked.compressstore.v8i32 (<8 x i32 > %vec , ptr align 4 %p , <8 x i1 > %mask )
230+ ret void
231+ }
232+
233+ define void @test_compressstore_v4i64 (ptr %p , <4 x i64 > %vec , <4 x i1 > %mask ) {
234+ ; CHECK-BASE-LABEL: test_compressstore_v4i64:
235+ ; CHECK-BASE: // %bb.0:
236+ ; CHECK-BASE-NEXT: shl v2.4h, v2.4h, #15
237+ ; CHECK-BASE-NEXT: adrp x8, .LCPI11_0
238+ ; CHECK-BASE-NEXT: ldr d3, [x8, :lo12:.LCPI11_0]
239+ ; CHECK-BASE-NEXT: cmlt v2.4h, v2.4h, #0
240+ ; CHECK-BASE-NEXT: and v2.8b, v2.8b, v3.8b
241+ ; CHECK-BASE-NEXT: addv h2, v2.4h
242+ ; CHECK-BASE-NEXT: fmov w8, s2
243+ ; CHECK-BASE-NEXT: tbnz w8, #0, .LBB11_5
244+ ; CHECK-BASE-NEXT: // %bb.1: // %else
245+ ; CHECK-BASE-NEXT: tbnz w8, #1, .LBB11_6
246+ ; CHECK-BASE-NEXT: .LBB11_2: // %else2
247+ ; CHECK-BASE-NEXT: tbnz w8, #2, .LBB11_7
248+ ; CHECK-BASE-NEXT: .LBB11_3: // %else5
249+ ; CHECK-BASE-NEXT: tbnz w8, #3, .LBB11_8
250+ ; CHECK-BASE-NEXT: .LBB11_4: // %else8
251+ ; CHECK-BASE-NEXT: ret
252+ ; CHECK-BASE-NEXT: .LBB11_5: // %cond.store
253+ ; CHECK-BASE-NEXT: st1 { v0.d }[0], [x0], #8
254+ ; CHECK-BASE-NEXT: tbz w8, #1, .LBB11_2
255+ ; CHECK-BASE-NEXT: .LBB11_6: // %cond.store1
256+ ; CHECK-BASE-NEXT: st1 { v0.d }[1], [x0], #8
257+ ; CHECK-BASE-NEXT: tbz w8, #2, .LBB11_3
258+ ; CHECK-BASE-NEXT: .LBB11_7: // %cond.store4
259+ ; CHECK-BASE-NEXT: st1 { v1.d }[0], [x0], #8
260+ ; CHECK-BASE-NEXT: tbz w8, #3, .LBB11_4
261+ ; CHECK-BASE-NEXT: .LBB11_8: // %cond.store7
262+ ; CHECK-BASE-NEXT: st1 { v1.d }[1], [x0]
263+ ; CHECK-BASE-NEXT: ret
264+ ;
265+ ; CHECK-VL256-LABEL: test_compressstore_v4i64:
266+ ; CHECK-VL256: // %bb.0:
267+ ; CHECK-VL256-NEXT: // kill: def $d2 killed $d2 def $z2
268+ ; CHECK-VL256-NEXT: ptrue p0.d, vl4
269+ ; CHECK-VL256-NEXT: // kill: def $q0 killed $q0 def $z0
270+ ; CHECK-VL256-NEXT: // kill: def $q1 killed $q1 def $z1
271+ ; CHECK-VL256-NEXT: uunpklo z2.s, z2.h
272+ ; CHECK-VL256-NEXT: ptrue p1.d, vl2
273+ ; CHECK-VL256-NEXT: splice z0.d, p1, z0.d, z1.d
274+ ; CHECK-VL256-NEXT: ptrue p1.d
275+ ; CHECK-VL256-NEXT: uunpklo z2.d, z2.s
276+ ; CHECK-VL256-NEXT: lsl z2.d, z2.d, #63
277+ ; CHECK-VL256-NEXT: asr z2.d, z2.d, #63
278+ ; CHECK-VL256-NEXT: cmpne p0.d, p0/z, z2.d, #0
279+ ; CHECK-VL256-NEXT: cntp x8, p1, p0.d
280+ ; CHECK-VL256-NEXT: compact z0.d, p0, z0.d
281+ ; CHECK-VL256-NEXT: whilelo p0.d, xzr, x8
282+ ; CHECK-VL256-NEXT: st1d { z0.d }, p0, [x0]
283+ ; CHECK-VL256-NEXT: ret
284+ tail call void @llvm.masked.compressstore.v4i64 (<4 x i64 > %vec , ptr align 8 %p , <4 x i1 > %mask )
285+ ret void
286+ }
0 commit comments