From a4c1b76c90a37c5d1b893cd2514c748ac0cfa23c Mon Sep 17 00:00:00 2001 From: Rose Date: Mon, 23 Jun 2025 19:36:43 -0400 Subject: [PATCH] Create AArch64TargetLowering::canCombineStoreAndExtract ST1 instructions extract and store in one instruction. We can map them to 64-bit and 128-bit registers (V and Q regs). --- .../Target/AArch64/AArch64ISelLowering.cpp | 32 +++++++++++++++++++ llvm/lib/Target/AArch64/AArch64ISelLowering.h | 3 ++ 2 files changed, 35 insertions(+) diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 1f98d69edb473..4c1e9ca1a09c9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -28282,6 +28282,38 @@ Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const { return TargetLowering::getSSPStackGuardCheck(M); } +bool AArch64TargetLowering::canCombineStoreAndExtract(Type *VectorTy, + Value *Idx, + unsigned &Cost) const { + // Floating point values and vector values map to the same register file. + // Therefore, although we could do a store extract of a vector type, this is + // better to leave at float as we have more freedom in the addressing mode for + // those. + if (VectorTy->isFPOrFPVectorTy()) + return false; + + // If the index is unknown at compile time, this is very expensive to lower + // and it is not possible to combine the store with the extract. + if (!isa(Idx)) + return false; + + assert(VectorTy->isVectorTy() && "VectorTy is not a vector type"); + + // Reject scalable vectors - ST1 lane indexing only works with fixed-size NEON + // vectors + if (cast(VectorTy)->isScalableTy()) + return false; + + unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue(); + // We can do a store + vector extract on any vector that fits perfectly in a V + // or Q register. + if (BitWidth == 64 || BitWidth == 128) { + Cost = 0; + return true; + } + return false; +} + Value * AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { // Android provides a fixed TLS slot for the SafeStack pointer. See the diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 89f90ee2b7707..8f7e9c53a5e5e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -354,6 +354,9 @@ class AArch64TargetLowering : public TargetLowering { bool isIntDivCheap(EVT VT, AttributeList Attr) const override; + bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, + unsigned &Cost) const override; + bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const MachineFunction &MF) const override;