Skip to content

Commit 3fde69d

Browse files
committed
[IA] Use a single callback for lowerInterleaveIntrinsic [nfc] (#148978)
This continues in the direction started by commit 4b81dc7. We essentially merges the handling for VPStore - currently in lowerInterleavedVPStore which is shared between shuffle and intrinsic based interleaves - into the existing dedicated routine.
1 parent 4b81dc7 commit 3fde69d

File tree

6 files changed

+70
-38
lines changed

6 files changed

+70
-38
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3262,10 +3262,14 @@ class LLVM_ABI TargetLoweringBase {
32623262
/// Return true on success. Currently only supports
32633263
/// llvm.vector.interleave{2,3,5,7}
32643264
///
3265-
/// \p SI is the accompanying store instruction
3265+
/// \p Store is the accompanying store instruction. Can be either a plain
3266+
/// store or a vp.store intrinsic.
3267+
/// \p Mask is a per-segment (i.e. number of lanes equal to that of one
3268+
/// component being interwoven) mask. Can be nullptr, in which case the
3269+
/// result is uncondiitional.
32663270
/// \p InterleaveValues contains the interleaved values.
32673271
virtual bool
3268-
lowerInterleaveIntrinsicToStore(StoreInst *SI,
3272+
lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask,
32693273
ArrayRef<Value *> InterleaveValues) const {
32703274
return false;
32713275
}

llvm/lib/CodeGen/InterleavedAccessPass.cpp

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -681,36 +681,33 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
681681
const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID());
682682
assert(Factor && "unexpected interleave intrinsic");
683683

684+
Value *Mask = nullptr;
684685
if (auto *VPStore = dyn_cast<VPIntrinsic>(StoredBy)) {
685686
if (VPStore->getIntrinsicID() != Intrinsic::vp_store)
686687
return false;
687688

688689
Value *WideMask = VPStore->getOperand(2);
689-
Value *Mask = getMask(WideMask, Factor,
690-
cast<VectorType>(InterleaveValues[0]->getType()));
690+
Mask = getMask(WideMask, Factor,
691+
cast<VectorType>(InterleaveValues[0]->getType()));
691692
if (!Mask)
692693
return false;
693694

694695
LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic "
695696
<< *II << " and factor = " << Factor << "\n");
696-
697-
// Since lowerInterleavedStore expects Shuffle and StoreInst, use special
698-
// TLI function to emit target-specific interleaved instruction.
699-
if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues))
700-
return false;
701697
} else {
702698
auto *SI = cast<StoreInst>(StoredBy);
703699
if (!SI->isSimple())
704700
return false;
705701

706702
LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II
707703
<< " and factor = " << Factor << "\n");
708-
709-
// Try and match this with target specific intrinsics.
710-
if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues))
711-
return false;
712704
}
713705

706+
// Try and match this with target specific intrinsics.
707+
if (!TLI->lowerInterleaveIntrinsicToStore(cast<Instruction>(StoredBy), Mask,
708+
InterleaveValues))
709+
return false;
710+
714711
// We now have a target-specific store, so delete the old one.
715712
DeadInsts.insert(cast<Instruction>(StoredBy));
716713
DeadInsts.insert(II);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17561,12 +17561,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad(
1756117561
}
1756217562

1756317563
bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore(
17564-
StoreInst *SI, ArrayRef<Value *> InterleavedValues) const {
17564+
Instruction *Store, Value *Mask,
17565+
ArrayRef<Value *> InterleavedValues) const {
1756517566
unsigned Factor = InterleavedValues.size();
1756617567
if (Factor != 2 && Factor != 4) {
1756717568
LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n");
1756817569
return false;
1756917570
}
17571+
StoreInst *SI = dyn_cast<StoreInst>(Store);
17572+
if (!SI)
17573+
return false;
17574+
assert(!Mask && "Unexpected mask on plain store");
1757017575

1757117576
VectorType *VTy = cast<VectorType>(InterleavedValues[0]->getType());
1757217577
const DataLayout &DL = SI->getModule()->getDataLayout();

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ class AArch64TargetLowering : public TargetLowering {
223223
ArrayRef<Value *> DeinterleaveValues) const override;
224224

225225
bool lowerInterleaveIntrinsicToStore(
226-
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
226+
Instruction *Store, Value *Mask,
227+
ArrayRef<Value *> InterleaveValues) const override;
227228

228229
bool isLegalAddImmediate(int64_t) const override;
229230
bool isLegalAddScalableImmediate(int64_t) const override;

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,8 @@ class RISCVTargetLowering : public TargetLowering {
442442
ArrayRef<Value *> DeinterleaveValues) const override;
443443

444444
bool lowerInterleaveIntrinsicToStore(
445-
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const override;
445+
Instruction *Store, Value *Mask,
446+
ArrayRef<Value *> InterleaveValues) const override;
446447

447448
bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask,
448449
ArrayRef<Value *> DeinterleaveRes) const override;

llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -360,58 +360,82 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
360360
}
361361

362362
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
363-
StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
363+
Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const {
364364
unsigned Factor = InterleaveValues.size();
365365
if (Factor > 8)
366366
return false;
367367

368-
assert(SI->isSimple());
369-
IRBuilder<> Builder(SI);
368+
IRBuilder<> Builder(Store);
370369

371370
auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
372-
auto *PtrTy = SI->getPointerOperandType();
373-
const DataLayout &DL = SI->getDataLayout();
371+
const DataLayout &DL = Store->getDataLayout();
372+
Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
374373

375-
if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
376-
SI->getPointerAddressSpace(), DL))
377-
return false;
374+
Value *Ptr, *VL;
375+
Align Alignment;
376+
if (auto *SI = dyn_cast<StoreInst>(Store)) {
377+
assert(SI->isSimple());
378+
Ptr = SI->getPointerOperand();
379+
Alignment = SI->getAlign();
380+
assert(!Mask && "Unexpected mask on a store");
381+
Mask = Builder.getAllOnesMask(InVTy->getElementCount());
382+
VL = isa<FixedVectorType>(InVTy)
383+
? Builder.CreateElementCount(XLenTy, InVTy->getElementCount())
384+
: Constant::getAllOnesValue(XLenTy);
385+
} else {
386+
auto *VPStore = cast<VPIntrinsic>(Store);
387+
assert(VPStore->getIntrinsicID() == Intrinsic::vp_store &&
388+
"Unexpected intrinsic");
389+
Ptr = VPStore->getMemoryPointerParam();
390+
Alignment = VPStore->getPointerAlignment().value_or(
391+
DL.getABITypeAlign(InVTy->getElementType()));
392+
393+
assert(Mask && "vp.store needs a mask!");
394+
395+
Value *WideEVL = VPStore->getVectorLengthParam();
396+
// Conservatively check if EVL is a multiple of factor, otherwise some
397+
// (trailing) elements might be lost after the transformation.
398+
if (!isMultipleOfN(WideEVL, DL, Factor))
399+
return false;
378400

379-
Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
401+
VL = Builder.CreateZExt(
402+
Builder.CreateUDiv(WideEVL,
403+
ConstantInt::get(WideEVL->getType(), Factor)),
404+
XLenTy);
405+
}
406+
Type *PtrTy = Ptr->getType();
407+
unsigned AS = Ptr->getType()->getPointerAddressSpace();
408+
if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL))
409+
return false;
380410

381411
if (isa<FixedVectorType>(InVTy)) {
382412
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
383-
SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy});
384-
413+
Store->getModule(), FixedVssegIntrIds[Factor - 2],
414+
{InVTy, PtrTy, XLenTy});
385415
SmallVector<Value *, 10> Ops(InterleaveValues);
386-
Value *VL = Builder.CreateElementCount(XLenTy, InVTy->getElementCount());
387-
Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount());
388-
Ops.append({SI->getPointerOperand(), Mask, VL});
389-
416+
Ops.append({Ptr, Mask, VL});
390417
Builder.CreateCall(VssegNFunc, Ops);
391418
return true;
392419
}
393420
unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
394421
unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
395422
Type *VecTupTy = TargetExtType::get(
396-
SI->getContext(), "riscv.vector.tuple",
397-
ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
423+
Store->getContext(), "riscv.vector.tuple",
424+
ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
398425
NumElts * SEW / 8),
399426
Factor);
400427

401-
Value *VL = Constant::getAllOnesValue(XLenTy);
402-
Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount());
403-
404428
Value *StoredVal = PoisonValue::get(VecTupTy);
405429
for (unsigned i = 0; i < Factor; ++i)
406430
StoredVal = Builder.CreateIntrinsic(
407431
Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
408432
{StoredVal, InterleaveValues[i], Builder.getInt32(i)});
409433

410434
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
411-
SI->getModule(), ScalableVssegIntrIds[Factor - 2],
435+
Store->getModule(), ScalableVssegIntrIds[Factor - 2],
412436
{VecTupTy, PtrTy, Mask->getType(), VL->getType()});
413437

414-
Value *Operands[] = {StoredVal, SI->getPointerOperand(), Mask, VL,
438+
Value *Operands[] = {StoredVal, Ptr, Mask, VL,
415439
ConstantInt::get(XLenTy, Log2_64(SEW))};
416440
Builder.CreateCall(VssegNFunc, Operands);
417441
return true;

0 commit comments

Comments
 (0)