Skip to content

Commit 45807c8

Browse files
committed
Address review comments
1 parent 8ab4b88 commit 45807c8

File tree

3 files changed

+76
-78
lines changed

3 files changed

+76
-78
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24683,9 +24683,8 @@ static SDValue getNarrowMaskForInterleavedOps(SelectionDAG &DAG, SDLoc &DL,
2468324683
return SDValue();
2468424684
}
2468524685

24686-
static SDValue
24687-
performStoreInterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
24688-
SelectionDAG &DAG) {
24686+
static SDValue performInterleavedMaskedStoreCombine(
24687+
SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG) {
2468924688
if (!DCI.isBeforeLegalize())
2469024689
return SDValue();
2469124690

@@ -24708,7 +24707,7 @@ performStoreInterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
2470824707
if (NumParts != 2 && NumParts != 4)
2470924708
return SDValue();
2471024709

24711-
// At the moment we're unlikely to see a fixed-width vector deinterleave as
24710+
// At the moment we're unlikely to see a fixed-width vector interleave as
2471224711
// we usually generate shuffles instead.
2471324712
EVT SubVecTy = ValueInterleaveOps[0].getValueType();
2471424713
if (!SubVecTy.isScalableVT() ||
@@ -24724,7 +24723,6 @@ performStoreInterleaveCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
2472424723

2472524724
const Intrinsic::ID IID =
2472624725
NumParts == 2 ? Intrinsic::aarch64_sve_st2 : Intrinsic::aarch64_sve_st4;
24727-
SDValue Res;
2472824726
SmallVector<SDValue, 8> NewStOps;
2472924727
NewStOps.append({MST->getChain(), DAG.getConstant(IID, DL, MVT::i32)});
2473024728
NewStOps.append(ValueInterleaveOps);
@@ -24741,7 +24739,7 @@ static SDValue performMSTORECombine(SDNode *N,
2474124739
SDValue Mask = MST->getMask();
2474224740
SDLoc DL(N);
2474324741

24744-
if (SDValue Res = performStoreInterleaveCombine(N, DCI, DAG))
24742+
if (SDValue Res = performInterleavedMaskedStoreCombine(N, DCI, DAG))
2474524743
return Res;
2474624744

2474724745
// If this is a UZP1 followed by a masked store, fold this into a masked

llvm/test/CodeGen/AArch64/fixed_masked_interleaved_stores.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -218,9 +218,9 @@ define void @foo_st2_v16i8(<16 x i1> %mask, <16 x i8> %val1, <16 x i8> %val2, pt
218218
; CHECK-NEXT: mov b0, v0.b[15]
219219
; CHECK-NEXT: stur b0, [x0, #31]
220220
; CHECK-NEXT: ret
221-
%interleaved.mask = tail call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %mask, <16 x i1> %mask)
222-
%strided.vec = tail call <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8> %val1, <16 x i8> %val2)
223-
tail call void @llvm.masked.store.v32i8.p0(<32 x i8> %strided.vec, ptr %p, i32 1, <32 x i1> %interleaved.mask)
221+
%interleaved.mask = call <32 x i1> @llvm.vector.interleave2.v32i1(<16 x i1> %mask, <16 x i1> %mask)
222+
%strided.vec = call <32 x i8> @llvm.vector.interleave2.v32i8(<16 x i8> %val1, <16 x i8> %val2)
223+
call void @llvm.masked.store.v32i8.p0(<32 x i8> %strided.vec, ptr %p, i32 1, <32 x i1> %interleaved.mask)
224224
ret void
225225
}
226226

@@ -336,9 +336,9 @@ define void @foo_st2_v8i16(<8 x i1> %mask, <8 x i16> %val1, <8 x i16> %val2, ptr
336336
; CHECK-NEXT: mov h0, v0.h[7]
337337
; CHECK-NEXT: str h0, [x0, #30]
338338
; CHECK-NEXT: ret
339-
%interleaved.mask = tail call <16 x i1> @llvm.vector.interleave2.v16i1(<8 x i1> %mask, <8 x i1> %mask)
340-
%strided.vec = tail call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %val1, <8 x i16> %val2)
341-
tail call void @llvm.masked.store.v16i16.p0(<16 x i16> %strided.vec, ptr %p, i32 1, <16 x i1> %interleaved.mask)
339+
%interleaved.mask = call <16 x i1> @llvm.vector.interleave2.v16i1(<8 x i1> %mask, <8 x i1> %mask)
340+
%strided.vec = call <16 x i16> @llvm.vector.interleave2.v16i16(<8 x i16> %val1, <8 x i16> %val2)
341+
call void @llvm.masked.store.v16i16.p0(<16 x i16> %strided.vec, ptr %p, i32 1, <16 x i1> %interleaved.mask)
342342
ret void
343343
}
344344

@@ -404,9 +404,9 @@ define void @foo_st2_v4i32(<4 x i1> %mask, <4 x i32> %val1, <4 x i32> %val2, ptr
404404
; CHECK-NEXT: mov s0, v0.s[3]
405405
; CHECK-NEXT: str s0, [x0, #28]
406406
; CHECK-NEXT: ret
407-
%interleaved.mask = tail call <8 x i1> @llvm.vector.interleave2.v8i1(<4 x i1> %mask, <4 x i1> %mask)
408-
%strided.vec = tail call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %val1, <4 x i32> %val2)
409-
tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %strided.vec, ptr %p, i32 1, <8 x i1> %interleaved.mask)
407+
%interleaved.mask = call <8 x i1> @llvm.vector.interleave2.v8i1(<4 x i1> %mask, <4 x i1> %mask)
408+
%strided.vec = call <8 x i32> @llvm.vector.interleave2.v8i32(<4 x i32> %val1, <4 x i32> %val2)
409+
call void @llvm.masked.store.v8i32.p0(<8 x i32> %strided.vec, ptr %p, i32 1, <8 x i1> %interleaved.mask)
410410
ret void
411411
}
412412

@@ -448,8 +448,8 @@ define void @foo_st2_v2i64(<2 x i1> %mask, <2 x i64> %val1, <2 x i64> %val2, ptr
448448
; CHECK-NEXT: mov d0, v0.d[1]
449449
; CHECK-NEXT: str d0, [x0, #24]
450450
; CHECK-NEXT: ret
451-
%interleaved.mask = tail call <4 x i1> @llvm.vector.interleave2.v4i1(<2 x i1> %mask, <2 x i1> %mask)
452-
%strided.vec = tail call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %val1, <2 x i64> %val2)
453-
tail call void @llvm.masked.store.v4i64.p0(<4 x i64> %strided.vec, ptr %p, i32 1, <4 x i1> %interleaved.mask)
451+
%interleaved.mask = call <4 x i1> @llvm.vector.interleave2.v4i1(<2 x i1> %mask, <2 x i1> %mask)
452+
%strided.vec = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %val1, <2 x i64> %val2)
453+
call void @llvm.masked.store.v4i64.p0(<4 x i64> %strided.vec, ptr %p, i32 1, <4 x i1> %interleaved.mask)
454454
ret void
455455
}

0 commit comments

Comments
 (0)