Skip to content

Commit a4c1b76

Browse files
committed
Create AArch64TargetLowering::canCombineStoreAndExtract
ST1 instructions extract and store in one instruction. We can map them to 64-bit and 128-bit registers (V and Q regs).
1 parent bc5e5c0 commit a4c1b76

File tree

2 files changed

+35
-0
lines changed

2 files changed

+35
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28282,6 +28282,38 @@ Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2828228282
return TargetLowering::getSSPStackGuardCheck(M);
2828328283
}
2828428284

28285+
bool AArch64TargetLowering::canCombineStoreAndExtract(Type *VectorTy,
28286+
Value *Idx,
28287+
unsigned &Cost) const {
28288+
// Floating point values and vector values map to the same register file.
28289+
// Therefore, although we could do a store extract of a vector type, this is
28290+
// better to leave at float as we have more freedom in the addressing mode for
28291+
// those.
28292+
if (VectorTy->isFPOrFPVectorTy())
28293+
return false;
28294+
28295+
// If the index is unknown at compile time, this is very expensive to lower
28296+
// and it is not possible to combine the store with the extract.
28297+
if (!isa<ConstantInt>(Idx))
28298+
return false;
28299+
28300+
assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
28301+
28302+
// Reject scalable vectors - ST1 lane indexing only works with fixed-size NEON
28303+
// vectors
28304+
if (cast<VectorType>(VectorTy)->isScalableTy())
28305+
return false;
28306+
28307+
unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
28308+
// We can do a store + vector extract on any vector that fits perfectly in a V
28309+
// or Q register.
28310+
if (BitWidth == 64 || BitWidth == 128) {
28311+
Cost = 0;
28312+
return true;
28313+
}
28314+
return false;
28315+
}
28316+
2828528317
Value *
2828628318
AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2828728319
// Android provides a fixed TLS slot for the SafeStack pointer. See the

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -354,6 +354,9 @@ class AArch64TargetLowering : public TargetLowering {
354354

355355
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
356356

357+
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
358+
unsigned &Cost) const override;
359+
357360
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
358361
const MachineFunction &MF) const override;
359362

0 commit comments

Comments
 (0)