Skip to content

Commit acaa925

Browse files
authored
[IA][RISCV] Recognize interleaving stores that could lower to strided segmented stores (#154647)
This is a sibling patch to #151612: passing gap masks to the renewal TLI hooks for lowering interleaved stores that use shufflevector to do the interleaving.
1 parent 8b544f3 commit acaa925

File tree

11 files changed

+151
-36
lines changed

11 files changed

+151
-36
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3229,9 +3229,11 @@ class LLVM_ABI TargetLoweringBase {
32293229
/// result is unconditional.
32303230
/// \p SVI is the shufflevector to RE-interleave the stored vector.
32313231
/// \p Factor is the interleave factor.
3232+
/// \p GapMask is a mask with zeros for components / fields that may not be
3233+
/// accessed.
32323234
virtual bool lowerInterleavedStore(Instruction *Store, Value *Mask,
3233-
ShuffleVectorInst *SVI,
3234-
unsigned Factor) const {
3235+
ShuffleVectorInst *SVI, unsigned Factor,
3236+
const APInt &GapMask) const {
32353237
return false;
32363238
}
32373239

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -537,28 +537,26 @@ bool InterleavedAccessImpl::lowerInterleavedStore(
537537
"number of stored element should be a multiple of Factor");
538538

539539
Value *Mask = nullptr;
540+
auto GapMask = APInt::getAllOnes(Factor);
540541
if (SI) {
541542
LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *Store << "\n");
542543
} else {
543544
// Check mask operand. Handle both all-true/false and interleaved mask.
544545
unsigned LaneMaskLen = NumStoredElements / Factor;
545-
APInt GapMask(Factor, 0);
546546
std::tie(Mask, GapMask) = getMask(getMaskOperand(II), Factor,
547547
ElementCount::getFixed(LaneMaskLen));
548548
if (!Mask)
549549
return false;
550-
// We haven't supported gap mask for stores. Yet it is possible that we
551-
// already changed the IR, hence returning true here.
552-
if (GapMask.popcount() != Factor)
553-
return true;
554550

555551
LLVM_DEBUG(dbgs() << "IA: Found an interleaved vp.store or masked.store: "
556552
<< *Store << "\n");
553+
LLVM_DEBUG(dbgs() << "IA: With nominal factor " << Factor
554+
<< " and actual factor " << GapMask.popcount() << "\n");
557555
}
558556

559557
// Try to create target specific intrinsics to replace the store and
560558
// shuffle.
561-
if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor))
559+
if (!TLI->lowerInterleavedStore(Store, Mask, SVI, Factor, GapMask))
562560
return false;
563561

564562
// Already have a new target specific interleaved store. Erase the old store.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17625,14 +17625,16 @@ bool hasNearbyPairedStore(Iter It, Iter End, Value *Ptr, const DataLayout &DL) {
1762517625
bool AArch64TargetLowering::lowerInterleavedStore(Instruction *Store,
1762617626
Value *LaneMask,
1762717627
ShuffleVectorInst *SVI,
17628-
unsigned Factor) const {
17628+
unsigned Factor,
17629+
const APInt &GapMask) const {
1762917630

1763017631
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
1763117632
"Invalid interleave factor");
1763217633
auto *SI = dyn_cast<StoreInst>(Store);
1763317634
if (!SI)
1763417635
return false;
17635-
assert(!LaneMask && "Unexpected mask on store");
17636+
assert(!LaneMask && GapMask.popcount() == Factor &&
17637+
"Unexpected mask on store");
1763617638

1763717639
auto *VecTy = cast<FixedVectorType>(SVI->getType());
1763817640
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ class AArch64TargetLowering : public TargetLowering {
233233
ArrayRef<unsigned> Indices, unsigned Factor,
234234
const APInt &GapMask) const override;
235235
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
236-
ShuffleVectorInst *SVI,
237-
unsigned Factor) const override;
236+
ShuffleVectorInst *SVI, unsigned Factor,
237+
const APInt &GapMask) const override;
238238

239239
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
240240
IntrinsicInst *DI) const override;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21706,13 +21706,15 @@ bool ARMTargetLowering::lowerInterleavedLoad(
2170621706
bool ARMTargetLowering::lowerInterleavedStore(Instruction *Store,
2170721707
Value *LaneMask,
2170821708
ShuffleVectorInst *SVI,
21709-
unsigned Factor) const {
21709+
unsigned Factor,
21710+
const APInt &GapMask) const {
2171021711
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
2171121712
"Invalid interleave factor");
2171221713
auto *SI = dyn_cast<StoreInst>(Store);
2171321714
if (!SI)
2171421715
return false;
21715-
assert(!LaneMask && "Unexpected mask on store");
21716+
assert(!LaneMask && GapMask.popcount() == Factor &&
21717+
"Unexpected mask on store");
2171621718

2171721719
auto *VecTy = cast<FixedVectorType>(SVI->getType());
2171821720
assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,8 @@ class VectorType;
688688
ArrayRef<unsigned> Indices, unsigned Factor,
689689
const APInt &GapMask) const override;
690690
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
691-
ShuffleVectorInst *SVI,
692-
unsigned Factor) const override;
691+
ShuffleVectorInst *SVI, unsigned Factor,
692+
const APInt &GapMask) const override;
693693

694694
bool shouldInsertFencesForAtomic(const Instruction *I) const override;
695695
TargetLoweringBase::AtomicExpansionKind

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -435,8 +435,8 @@ class RISCVTargetLowering : public TargetLowering {
435435
const APInt &GapMask) const override;
436436

437437
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
438-
ShuffleVectorInst *SVI,
439-
unsigned Factor) const override;
438+
ShuffleVectorInst *SVI, unsigned Factor,
439+
const APInt &GapMask) const override;
440440

441441
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
442442
IntrinsicInst *DI) const override;

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
8181
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
8282
Intrinsic::riscv_seg8_store_mask};
8383

84+
static const Intrinsic::ID FixedVsssegIntrIds[] = {
85+
Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
86+
Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
87+
Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
88+
Intrinsic::riscv_sseg8_store_mask};
89+
8490
static const Intrinsic::ID ScalableVssegIntrIds[] = {
8591
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
8692
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
275281
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
276282
Value *LaneMask,
277283
ShuffleVectorInst *SVI,
278-
unsigned Factor) const {
284+
unsigned Factor,
285+
const APInt &GapMask) const {
286+
assert(GapMask.getBitWidth() == Factor);
287+
288+
// We only support cases where the skipped fields are the trailing ones.
289+
// TODO: Lower to strided store if there is only a single active field.
290+
unsigned MaskFactor = GapMask.popcount();
291+
if (MaskFactor < 2 || !GapMask.isMask())
292+
return false;
293+
279294
IRBuilder<> Builder(Store);
280295
const DataLayout &DL = Store->getDataLayout();
281296
auto Mask = SVI->getShuffleMask();
@@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
287302

288303
Value *Ptr, *VL;
289304
Align Alignment;
290-
if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
305+
if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
306+
Alignment))
291307
return false;
292308

293309
Type *PtrTy = Ptr->getType();
294310
unsigned AS = PtrTy->getPointerAddressSpace();
295-
if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
311+
if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
296312
return false;
297313

298-
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
299-
Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
314+
Function *SegStoreFunc;
315+
if (MaskFactor < Factor)
316+
// Strided segmented store.
317+
SegStoreFunc = Intrinsic::getOrInsertDeclaration(
318+
Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
319+
{VTy, PtrTy, XLenTy, XLenTy});
320+
else
321+
// Normal segmented store.
322+
SegStoreFunc = Intrinsic::getOrInsertDeclaration(
323+
Store->getModule(), FixedVssegIntrIds[Factor - 2],
324+
{VTy, PtrTy, XLenTy});
300325

301326
SmallVector<Value *, 10> Ops;
302327
SmallVector<int, 16> NewShuffleMask;
303328

304-
for (unsigned i = 0; i < Factor; i++) {
329+
for (unsigned i = 0; i < MaskFactor; i++) {
305330
// Collect shuffle mask for this lane.
306331
for (unsigned j = 0; j < VTy->getNumElements(); j++)
307332
NewShuffleMask.push_back(Mask[i + Factor * j]);
@@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
312337

313338
NewShuffleMask.clear();
314339
}
315-
Ops.append({Ptr, LaneMask, VL});
316-
Builder.CreateCall(VssegNFunc, Ops);
340+
Ops.push_back(Ptr);
341+
if (MaskFactor < Factor) {
342+
// Insert the stride argument.
343+
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
344+
Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
345+
}
346+
Ops.append({LaneMask, VL});
347+
Builder.CreateCall(SegStoreFunc, Ops);
317348

318349
return true;
319350
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,8 +1668,8 @@ namespace llvm {
16681668
/// Lower interleaved store(s) into target specific
16691669
/// instructions/intrinsics.
16701670
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
1671-
ShuffleVectorInst *SVI,
1672-
unsigned Factor) const override;
1671+
ShuffleVectorInst *SVI, unsigned Factor,
1672+
const APInt &GapMask) const override;
16731673

16741674
SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
16751675
int JTI, SelectionDAG &DAG) const override;

llvm/lib/Target/X86/X86InterleavedAccess.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -825,7 +825,8 @@ bool X86TargetLowering::lowerInterleavedLoad(
825825
bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
826826
Value *LaneMask,
827827
ShuffleVectorInst *SVI,
828-
unsigned Factor) const {
828+
unsigned Factor,
829+
const APInt &GapMask) const {
829830
assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
830831
"Invalid interleave factor");
831832

@@ -836,7 +837,8 @@ bool X86TargetLowering::lowerInterleavedStore(Instruction *Store,
836837
auto *SI = dyn_cast<StoreInst>(Store);
837838
if (!SI)
838839
return false;
839-
assert(!LaneMask && "Unexpected mask on store");
840+
assert(!LaneMask && GapMask.popcount() == Factor &&
841+
"Unexpected mask on store");
840842

841843
// Holds the indices of SVI that correspond to the starting index of each
842844
// interleaved shuffle.

0 commit comments

Comments
 (0)