Skip to content

Commit a980f8a

Browse files
committed
Avoid VECREDUCE_ADD issues
1 parent 02d3507 commit a980f8a

File tree

2 files changed

+21
-26
lines changed

2 files changed

+21
-26
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30417,10 +30417,17 @@ SDValue AArch64TargetLowering::LowerMSTORE(SDValue Op,
3041730417
return SDValue();
3041830418

3041930419
EVT MaskVT = Store->getMask().getValueType();
30420-
30420+
EVT MaskExtVT = getPromotedVTForPredicate(MaskVT);
30421+
EVT MaskReduceVT = MaskExtVT.getScalarType();
3042130422
SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
30423+
30424+
SDValue MaskExt =
30425+
DAG.getNode(ISD::ZERO_EXTEND, DL, MaskExtVT, Store->getMask());
3042230426
SDValue CntActive =
30423-
DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i64, Store->getMask());
30427+
DAG.getNode(ISD::VECREDUCE_ADD, DL, MaskReduceVT, MaskExt);
30428+
if (MaskReduceVT != MVT::i64)
30429+
CntActive = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, CntActive);
30430+
3042430431
SDValue CompressedValue =
3042530432
DAG.getNode(ISD::VECTOR_COMPRESS, DL, VT, Store->getValue(),
3042630433
Store->getMask(), DAG.getPOISON(VT));

llvm/test/CodeGen/AArch64/sve-masked-compressstore.ll

Lines changed: 12 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,8 @@
77
define void @test_compressstore_nxv4i32(ptr %p, <vscale x 4 x i32> %vec, <vscale x 4 x i1> %mask) {
88
; CHECK-LABEL: test_compressstore_nxv4i32:
99
; CHECK: // %bb.0:
10-
; CHECK-NEXT: ptrue p1.s
10+
; CHECK-NEXT: cntp x8, p0, p0.s
1111
; CHECK-NEXT: compact z0.s, p0, z0.s
12-
; CHECK-NEXT: cntp x8, p1, p0.s
1312
; CHECK-NEXT: whilelo p0.s, xzr, x8
1413
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
1514
; CHECK-NEXT: ret
@@ -20,9 +19,8 @@ define void @test_compressstore_nxv4i32(ptr %p, <vscale x 4 x i32> %vec, <vscale
2019
define void @test_compressstore_nxv2i64(ptr %p, <vscale x 2 x i64> %vec, <vscale x 2 x i1> %mask) {
2120
; CHECK-LABEL: test_compressstore_nxv2i64:
2221
; CHECK: // %bb.0:
23-
; CHECK-NEXT: ptrue p1.d
22+
; CHECK-NEXT: cntp x8, p0, p0.d
2423
; CHECK-NEXT: compact z0.d, p0, z0.d
25-
; CHECK-NEXT: cntp x8, p1, p0.d
2624
; CHECK-NEXT: whilelo p0.d, xzr, x8
2725
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
2826
; CHECK-NEXT: ret
@@ -33,9 +31,8 @@ define void @test_compressstore_nxv2i64(ptr %p, <vscale x 2 x i64> %vec, <vscale
3331
define void @test_compressstore_nxv4f32(ptr %p, <vscale x 4 x float> %vec, <vscale x 4 x i1> %mask) {
3432
; CHECK-LABEL: test_compressstore_nxv4f32:
3533
; CHECK: // %bb.0:
36-
; CHECK-NEXT: ptrue p1.s
34+
; CHECK-NEXT: cntp x8, p0, p0.s
3735
; CHECK-NEXT: compact z0.s, p0, z0.s
38-
; CHECK-NEXT: cntp x8, p1, p0.s
3936
; CHECK-NEXT: whilelo p0.s, xzr, x8
4037
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
4138
; CHECK-NEXT: ret
@@ -46,9 +43,8 @@ define void @test_compressstore_nxv4f32(ptr %p, <vscale x 4 x float> %vec, <vsca
4643
define void @test_compressstore_nxv2f64(ptr %p, <vscale x 2 x double> %vec, <vscale x 2 x i1> %mask) {
4744
; CHECK-LABEL: test_compressstore_nxv2f64:
4845
; CHECK: // %bb.0:
49-
; CHECK-NEXT: ptrue p1.d
46+
; CHECK-NEXT: cntp x8, p0, p0.d
5047
; CHECK-NEXT: compact z0.d, p0, z0.d
51-
; CHECK-NEXT: cntp x8, p1, p0.d
5248
; CHECK-NEXT: whilelo p0.d, xzr, x8
5349
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
5450
; CHECK-NEXT: ret
@@ -61,9 +57,8 @@ define void @test_compressstore_nxv2f64(ptr %p, <vscale x 2 x double> %vec, <vsc
6157
define void @test_compressstore_nxv2f32(ptr %p, <vscale x 2 x float> %vec, <vscale x 2 x i1> %mask) {
6258
; CHECK-LABEL: test_compressstore_nxv2f32:
6359
; CHECK: // %bb.0:
64-
; CHECK-NEXT: ptrue p1.d
60+
; CHECK-NEXT: cntp x8, p0, p0.d
6561
; CHECK-NEXT: compact z0.d, p0, z0.d
66-
; CHECK-NEXT: cntp x8, p1, p0.d
6762
; CHECK-NEXT: whilelo p0.d, xzr, x8
6863
; CHECK-NEXT: st1w { z0.d }, p0, [x0]
6964
; CHECK-NEXT: ret
@@ -76,9 +71,8 @@ define void @test_compressstore_nxv2f32(ptr %p, <vscale x 2 x float> %vec, <vsca
7671
define void @test_compressstore_nxv2i8(ptr %p, <vscale x 2 x i8> %vec, <vscale x 2 x i1> %mask) {
7772
; CHECK-LABEL: test_compressstore_nxv2i8:
7873
; CHECK: // %bb.0:
79-
; CHECK-NEXT: ptrue p1.d
74+
; CHECK-NEXT: cntp x8, p0, p0.d
8075
; CHECK-NEXT: compact z0.d, p0, z0.d
81-
; CHECK-NEXT: cntp x8, p1, p0.d
8276
; CHECK-NEXT: whilelo p0.d, xzr, x8
8377
; CHECK-NEXT: st1b { z0.d }, p0, [x0]
8478
; CHECK-NEXT: ret
@@ -89,9 +83,8 @@ define void @test_compressstore_nxv2i8(ptr %p, <vscale x 2 x i8> %vec, <vscale x
8983
define void @test_compressstore_nxv4i16(ptr %p, <vscale x 4 x i16> %vec, <vscale x 4 x i1> %mask) {
9084
; CHECK-LABEL: test_compressstore_nxv4i16:
9185
; CHECK: // %bb.0:
92-
; CHECK-NEXT: ptrue p1.s
86+
; CHECK-NEXT: cntp x8, p0, p0.s
9387
; CHECK-NEXT: compact z0.s, p0, z0.s
94-
; CHECK-NEXT: cntp x8, p1, p0.s
9588
; CHECK-NEXT: whilelo p0.s, xzr, x8
9689
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
9790
; CHECK-NEXT: ret
@@ -107,10 +100,9 @@ define void @test_compressstore_v2f64(ptr %p, <2 x double> %vec, <2 x i1> %mask)
107100
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
108101
; CHECK-NEXT: ptrue p0.d, vl2
109102
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
110-
; CHECK-NEXT: ptrue p1.d
111103
; CHECK-NEXT: shl v1.2d, v1.2d, #63
112104
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
113-
; CHECK-NEXT: cntp x8, p1, p0.d
105+
; CHECK-NEXT: cntp x8, p0, p0.d
114106
; CHECK-NEXT: compact z0.d, p0, z0.d
115107
; CHECK-NEXT: whilelo p0.d, xzr, x8
116108
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -125,10 +117,9 @@ define void @test_compressstore_v4i32(ptr %p, <4 x i32> %vec, <4 x i1> %mask) {
125117
; CHECK-NEXT: ushll v1.4s, v1.4h, #0
126118
; CHECK-NEXT: ptrue p0.s, vl4
127119
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
128-
; CHECK-NEXT: ptrue p1.s
129120
; CHECK-NEXT: shl v1.4s, v1.4s, #31
130121
; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0
131-
; CHECK-NEXT: cntp x8, p1, p0.s
122+
; CHECK-NEXT: cntp x8, p0, p0.s
132123
; CHECK-NEXT: compact z0.s, p0, z0.s
133124
; CHECK-NEXT: whilelo p0.s, xzr, x8
134125
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
@@ -143,10 +134,9 @@ define void @test_compressstore_v2i64(ptr %p, <2 x i64> %vec, <2 x i1> %mask) {
143134
; CHECK-NEXT: ushll v1.2d, v1.2s, #0
144135
; CHECK-NEXT: ptrue p0.d, vl2
145136
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
146-
; CHECK-NEXT: ptrue p1.d
147137
; CHECK-NEXT: shl v1.2d, v1.2d, #63
148138
; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
149-
; CHECK-NEXT: cntp x8, p1, p0.d
139+
; CHECK-NEXT: cntp x8, p0, p0.d
150140
; CHECK-NEXT: compact z0.d, p0, z0.d
151141
; CHECK-NEXT: whilelo p0.d, xzr, x8
152142
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
@@ -216,12 +206,11 @@ define void @test_compressstore_v8i32(ptr %p, <8 x i32> %vec, <8 x i1> %mask) {
216206
; CHECK-VL256-NEXT: uunpklo z2.h, z2.b
217207
; CHECK-VL256-NEXT: ptrue p1.s, vl4
218208
; CHECK-VL256-NEXT: splice z0.s, p1, z0.s, z1.s
219-
; CHECK-VL256-NEXT: ptrue p1.s
220209
; CHECK-VL256-NEXT: uunpklo z2.s, z2.h
221210
; CHECK-VL256-NEXT: lsl z2.s, z2.s, #31
222211
; CHECK-VL256-NEXT: asr z2.s, z2.s, #31
223212
; CHECK-VL256-NEXT: cmpne p0.s, p0/z, z2.s, #0
224-
; CHECK-VL256-NEXT: cntp x8, p1, p0.s
213+
; CHECK-VL256-NEXT: cntp x8, p0, p0.s
225214
; CHECK-VL256-NEXT: compact z0.s, p0, z0.s
226215
; CHECK-VL256-NEXT: whilelo p0.s, xzr, x8
227216
; CHECK-VL256-NEXT: st1w { z0.s }, p0, [x0]
@@ -271,12 +260,11 @@ define void @test_compressstore_v4i64(ptr %p, <4 x i64> %vec, <4 x i1> %mask) {
271260
; CHECK-VL256-NEXT: uunpklo z2.s, z2.h
272261
; CHECK-VL256-NEXT: ptrue p1.d, vl2
273262
; CHECK-VL256-NEXT: splice z0.d, p1, z0.d, z1.d
274-
; CHECK-VL256-NEXT: ptrue p1.d
275263
; CHECK-VL256-NEXT: uunpklo z2.d, z2.s
276264
; CHECK-VL256-NEXT: lsl z2.d, z2.d, #63
277265
; CHECK-VL256-NEXT: asr z2.d, z2.d, #63
278266
; CHECK-VL256-NEXT: cmpne p0.d, p0/z, z2.d, #0
279-
; CHECK-VL256-NEXT: cntp x8, p1, p0.d
267+
; CHECK-VL256-NEXT: cntp x8, p0, p0.d
280268
; CHECK-VL256-NEXT: compact z0.d, p0, z0.d
281269
; CHECK-VL256-NEXT: whilelo p0.d, xzr, x8
282270
; CHECK-VL256-NEXT: st1d { z0.d }, p0, [x0]

0 commit comments

Comments
 (0)