@@ -52,6 +52,26 @@ define void @test_compressstore_nxv2f64(ptr %p, <vscale x 2 x double> %vec, <vsc
5252 ret void
5353}
5454
55+ ;; SVE vectors that will be split
56+
57+ define void @test_compressstore_nxv8i32 (ptr %p , <vscale x 8 x i32 > %vec , <vscale x 8 x i1 > %mask ) {
58+ ; CHECK-LABEL: test_compressstore_nxv8i32:
59+ ; CHECK: // %bb.0:
60+ ; CHECK-NEXT: punpkhi p1.h, p0.b
61+ ; CHECK-NEXT: punpklo p0.h, p0.b
62+ ; CHECK-NEXT: cntp x8, p1, p1.s
63+ ; CHECK-NEXT: compact z1.s, p1, z1.s
64+ ; CHECK-NEXT: cntp x9, p0, p0.s
65+ ; CHECK-NEXT: compact z0.s, p0, z0.s
66+ ; CHECK-NEXT: whilelo p0.s, xzr, x8
67+ ; CHECK-NEXT: whilelo p1.s, xzr, x9
68+ ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x9, lsl #2]
69+ ; CHECK-NEXT: st1w { z0.s }, p1, [x0]
70+ ; CHECK-NEXT: ret
71+ tail call void @llvm.masked.compressstore.nxv8i32 (<vscale x 8 x i32 > %vec , ptr align 4 %p , <vscale x 8 x i1 > %mask )
72+ ret void
73+ }
74+
5575;; Unpacked SVE vector types
5676
5777define void @test_compressstore_nxv2f32 (ptr %p , <vscale x 2 x float > %vec , <vscale x 2 x i1 > %mask ) {
@@ -148,53 +168,29 @@ define void @test_compressstore_v2i64(ptr %p, <2 x i64> %vec, <2 x i1> %mask) {
148168define void @test_compressstore_v8i32 (ptr %p , <8 x i32 > %vec , <8 x i1 > %mask ) {
149169; CHECK-BASE-LABEL: test_compressstore_v8i32:
150170; CHECK-BASE: // %bb.0:
151- ; CHECK-BASE-NEXT: shl v2.8b, v2.8b, #7
152- ; CHECK-BASE-NEXT: adrp x8, .LCPI10_0
153- ; CHECK-BASE-NEXT: ldr d3, [x8, :lo12:.LCPI10_0]
154- ; CHECK-BASE-NEXT: cmlt v2.8b, v2.8b, #0
155- ; CHECK-BASE-NEXT: and v2.8b, v2.8b, v3.8b
156- ; CHECK-BASE-NEXT: addv b2, v2.8b
157- ; CHECK-BASE-NEXT: fmov w8, s2
158- ; CHECK-BASE-NEXT: tbnz w8, #0, .LBB10_9
159- ; CHECK-BASE-NEXT: // %bb.1: // %else
160- ; CHECK-BASE-NEXT: tbnz w8, #1, .LBB10_10
161- ; CHECK-BASE-NEXT: .LBB10_2: // %else2
162- ; CHECK-BASE-NEXT: tbnz w8, #2, .LBB10_11
163- ; CHECK-BASE-NEXT: .LBB10_3: // %else5
164- ; CHECK-BASE-NEXT: tbnz w8, #3, .LBB10_12
165- ; CHECK-BASE-NEXT: .LBB10_4: // %else8
166- ; CHECK-BASE-NEXT: tbnz w8, #4, .LBB10_13
167- ; CHECK-BASE-NEXT: .LBB10_5: // %else11
168- ; CHECK-BASE-NEXT: tbnz w8, #5, .LBB10_14
169- ; CHECK-BASE-NEXT: .LBB10_6: // %else14
170- ; CHECK-BASE-NEXT: tbnz w8, #6, .LBB10_15
171- ; CHECK-BASE-NEXT: .LBB10_7: // %else17
172- ; CHECK-BASE-NEXT: tbnz w8, #7, .LBB10_16
173- ; CHECK-BASE-NEXT: .LBB10_8: // %else20
174- ; CHECK-BASE-NEXT: ret
175- ; CHECK-BASE-NEXT: .LBB10_9: // %cond.store
176- ; CHECK-BASE-NEXT: st1 { v0.s }[0], [x0], #4
177- ; CHECK-BASE-NEXT: tbz w8, #1, .LBB10_2
178- ; CHECK-BASE-NEXT: .LBB10_10: // %cond.store1
179- ; CHECK-BASE-NEXT: st1 { v0.s }[1], [x0], #4
180- ; CHECK-BASE-NEXT: tbz w8, #2, .LBB10_3
181- ; CHECK-BASE-NEXT: .LBB10_11: // %cond.store4
182- ; CHECK-BASE-NEXT: st1 { v0.s }[2], [x0], #4
183- ; CHECK-BASE-NEXT: tbz w8, #3, .LBB10_4
184- ; CHECK-BASE-NEXT: .LBB10_12: // %cond.store7
185- ; CHECK-BASE-NEXT: st1 { v0.s }[3], [x0], #4
186- ; CHECK-BASE-NEXT: tbz w8, #4, .LBB10_5
187- ; CHECK-BASE-NEXT: .LBB10_13: // %cond.store10
188- ; CHECK-BASE-NEXT: st1 { v1.s }[0], [x0], #4
189- ; CHECK-BASE-NEXT: tbz w8, #5, .LBB10_6
190- ; CHECK-BASE-NEXT: .LBB10_14: // %cond.store13
191- ; CHECK-BASE-NEXT: st1 { v1.s }[1], [x0], #4
192- ; CHECK-BASE-NEXT: tbz w8, #6, .LBB10_7
193- ; CHECK-BASE-NEXT: .LBB10_15: // %cond.store16
194- ; CHECK-BASE-NEXT: st1 { v1.s }[2], [x0], #4
195- ; CHECK-BASE-NEXT: tbz w8, #7, .LBB10_8
196- ; CHECK-BASE-NEXT: .LBB10_16: // %cond.store19
197- ; CHECK-BASE-NEXT: st1 { v1.s }[3], [x0]
171+ ; CHECK-BASE-NEXT: // kill: def $q0 killed $q0 def $z0
172+ ; CHECK-BASE-NEXT: zip2 v3.8b, v2.8b, v0.8b
173+ ; CHECK-BASE-NEXT: zip1 v2.8b, v2.8b, v0.8b
174+ ; CHECK-BASE-NEXT: // kill: def $q1 killed $q1 def $z1
175+ ; CHECK-BASE-NEXT: movi v4.4s, #1
176+ ; CHECK-BASE-NEXT: ptrue p0.s, vl4
177+ ; CHECK-BASE-NEXT: ushll v3.4s, v3.4h, #0
178+ ; CHECK-BASE-NEXT: ushll v2.4s, v2.4h, #0
179+ ; CHECK-BASE-NEXT: shl v3.4s, v3.4s, #31
180+ ; CHECK-BASE-NEXT: shl v5.4s, v2.4s, #31
181+ ; CHECK-BASE-NEXT: and v2.16b, v2.16b, v4.16b
182+ ; CHECK-BASE-NEXT: cmpne p1.s, p0/z, z3.s, #0
183+ ; CHECK-BASE-NEXT: cmpne p0.s, p0/z, z5.s, #0
184+ ; CHECK-BASE-NEXT: addv s2, v2.4s
185+ ; CHECK-BASE-NEXT: fmov w10, s2
186+ ; CHECK-BASE-NEXT: cntp x8, p1, p1.s
187+ ; CHECK-BASE-NEXT: compact z1.s, p1, z1.s
188+ ; CHECK-BASE-NEXT: compact z0.s, p0, z0.s
189+ ; CHECK-BASE-NEXT: cntp x9, p0, p0.s
190+ ; CHECK-BASE-NEXT: whilelo p0.s, xzr, x8
191+ ; CHECK-BASE-NEXT: whilelo p1.s, xzr, x9
192+ ; CHECK-BASE-NEXT: st1w { z1.s }, p0, [x0, x10, lsl #2]
193+ ; CHECK-BASE-NEXT: st1w { z0.s }, p1, [x0]
198194; CHECK-BASE-NEXT: ret
199195;
200196; CHECK-VL256-LABEL: test_compressstore_v8i32:
@@ -222,33 +218,28 @@ define void @test_compressstore_v8i32(ptr %p, <8 x i32> %vec, <8 x i1> %mask) {
222218define void @test_compressstore_v4i64 (ptr %p , <4 x i64 > %vec , <4 x i1 > %mask ) {
223219; CHECK-BASE-LABEL: test_compressstore_v4i64:
224220; CHECK-BASE: // %bb.0:
225- ; CHECK-BASE-NEXT: shl v2.4h, v2.4h, #15
226- ; CHECK-BASE-NEXT: adrp x8, .LCPI11_0
227- ; CHECK-BASE-NEXT: ldr d3, [x8, :lo12:.LCPI11_0]
228- ; CHECK-BASE-NEXT: cmlt v2.4h, v2.4h, #0
229- ; CHECK-BASE-NEXT: and v2.8b, v2.8b, v3.8b
230- ; CHECK-BASE-NEXT: addv h2, v2.4h
231- ; CHECK-BASE-NEXT: fmov w8, s2
232- ; CHECK-BASE-NEXT: tbnz w8, #0, .LBB11_5
233- ; CHECK-BASE-NEXT: // %bb.1: // %else
234- ; CHECK-BASE-NEXT: tbnz w8, #1, .LBB11_6
235- ; CHECK-BASE-NEXT: .LBB11_2: // %else2
236- ; CHECK-BASE-NEXT: tbnz w8, #2, .LBB11_7
237- ; CHECK-BASE-NEXT: .LBB11_3: // %else5
238- ; CHECK-BASE-NEXT: tbnz w8, #3, .LBB11_8
239- ; CHECK-BASE-NEXT: .LBB11_4: // %else8
240- ; CHECK-BASE-NEXT: ret
241- ; CHECK-BASE-NEXT: .LBB11_5: // %cond.store
242- ; CHECK-BASE-NEXT: st1 { v0.d }[0], [x0], #8
243- ; CHECK-BASE-NEXT: tbz w8, #1, .LBB11_2
244- ; CHECK-BASE-NEXT: .LBB11_6: // %cond.store1
245- ; CHECK-BASE-NEXT: st1 { v0.d }[1], [x0], #8
246- ; CHECK-BASE-NEXT: tbz w8, #2, .LBB11_3
247- ; CHECK-BASE-NEXT: .LBB11_7: // %cond.store4
248- ; CHECK-BASE-NEXT: st1 { v1.d }[0], [x0], #8
249- ; CHECK-BASE-NEXT: tbz w8, #3, .LBB11_4
250- ; CHECK-BASE-NEXT: .LBB11_8: // %cond.store7
251- ; CHECK-BASE-NEXT: st1 { v1.d }[1], [x0]
221+ ; CHECK-BASE-NEXT: ushll v2.4s, v2.4h, #0
222+ ; CHECK-BASE-NEXT: movi v5.2s, #1
223+ ; CHECK-BASE-NEXT: // kill: def $q1 killed $q1 def $z1
224+ ; CHECK-BASE-NEXT: // kill: def $q0 killed $q0 def $z0
225+ ; CHECK-BASE-NEXT: ptrue p0.d, vl2
226+ ; CHECK-BASE-NEXT: ushll2 v3.2d, v2.4s, #0
227+ ; CHECK-BASE-NEXT: ushll v4.2d, v2.2s, #0
228+ ; CHECK-BASE-NEXT: and v2.8b, v2.8b, v5.8b
229+ ; CHECK-BASE-NEXT: shl v3.2d, v3.2d, #63
230+ ; CHECK-BASE-NEXT: shl v4.2d, v4.2d, #63
231+ ; CHECK-BASE-NEXT: addp v2.2s, v2.2s, v2.2s
232+ ; CHECK-BASE-NEXT: cmpne p1.d, p0/z, z3.d, #0
233+ ; CHECK-BASE-NEXT: cmpne p0.d, p0/z, z4.d, #0
234+ ; CHECK-BASE-NEXT: fmov w10, s2
235+ ; CHECK-BASE-NEXT: cntp x8, p1, p1.d
236+ ; CHECK-BASE-NEXT: compact z1.d, p1, z1.d
237+ ; CHECK-BASE-NEXT: compact z0.d, p0, z0.d
238+ ; CHECK-BASE-NEXT: cntp x9, p0, p0.d
239+ ; CHECK-BASE-NEXT: whilelo p0.d, xzr, x8
240+ ; CHECK-BASE-NEXT: whilelo p1.d, xzr, x9
241+ ; CHECK-BASE-NEXT: st1d { z1.d }, p0, [x0, x10, lsl #3]
242+ ; CHECK-BASE-NEXT: st1d { z0.d }, p1, [x0]
252243; CHECK-BASE-NEXT: ret
253244;
254245; CHECK-VL256-LABEL: test_compressstore_v4i64:
0 commit comments