diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 72594c7f9783c..e26d05bcda021 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -3262,10 +3262,14 @@ class LLVM_ABI TargetLoweringBase { /// Return true on success. Currently only supports /// llvm.vector.interleave{2,3,5,7} /// - /// \p SI is the accompanying store instruction + /// \p Store is the accompanying store instruction. Can be either a plain + /// store or a vp.store intrinsic. + /// \p Mask is a per-segment (i.e. number of lanes equal to that of one + /// component being interwoven) mask. Can be nullptr, in which case the + /// result is uncondiitional. /// \p InterleaveValues contains the interleaved values. virtual bool - lowerInterleaveIntrinsicToStore(StoreInst *SI, + lowerInterleaveIntrinsicToStore(Instruction *Store, Value *Mask, ArrayRef InterleaveValues) const { return false; } diff --git a/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/llvm/lib/CodeGen/InterleavedAccessPass.cpp index 95599837e1bfc..0c0cabf40b039 100644 --- a/llvm/lib/CodeGen/InterleavedAccessPass.cpp +++ b/llvm/lib/CodeGen/InterleavedAccessPass.cpp @@ -681,23 +681,19 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( const unsigned Factor = getInterleaveIntrinsicFactor(II->getIntrinsicID()); assert(Factor && "unexpected interleave intrinsic"); + Value *Mask = nullptr; if (auto *VPStore = dyn_cast(StoredBy)) { if (VPStore->getIntrinsicID() != Intrinsic::vp_store) return false; Value *WideMask = VPStore->getOperand(2); - Value *Mask = getMask(WideMask, Factor, - cast(InterleaveValues[0]->getType())); + Mask = getMask(WideMask, Factor, + cast(InterleaveValues[0]->getType())); if (!Mask) return false; LLVM_DEBUG(dbgs() << "IA: Found a vp.store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Since lowerInterleavedStore expects Shuffle and StoreInst, use special - // TLI function to emit target-specific interleaved instruction. - if (!TLI->lowerInterleavedVPStore(VPStore, Mask, InterleaveValues)) - return false; } else { auto *SI = cast(StoredBy); if (!SI->isSimple()) @@ -705,12 +701,13 @@ bool InterleavedAccessImpl::lowerInterleaveIntrinsic( LLVM_DEBUG(dbgs() << "IA: Found a store with interleave intrinsic " << *II << " and factor = " << Factor << "\n"); - - // Try and match this with target specific intrinsics. - if (!TLI->lowerInterleaveIntrinsicToStore(SI, InterleaveValues)) - return false; } + // Try and match this with target specific intrinsics. + if (!TLI->lowerInterleaveIntrinsicToStore(cast(StoredBy), Mask, + InterleaveValues)) + return false; + // We now have a target-specific store, so delete the old one. DeadInsts.insert(cast(StoredBy)); DeadInsts.insert(II); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 235df9022c6fb..81c5263212dd6 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17561,12 +17561,17 @@ bool AArch64TargetLowering::lowerDeinterleaveIntrinsicToLoad( } bool AArch64TargetLowering::lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef InterleavedValues) const { + Instruction *Store, Value *Mask, + ArrayRef InterleavedValues) const { unsigned Factor = InterleavedValues.size(); if (Factor != 2 && Factor != 4) { LLVM_DEBUG(dbgs() << "Matching st2 and st4 patterns failed\n"); return false; } + StoreInst *SI = dyn_cast(Store); + if (!SI) + return false; + assert(!Mask && "Unexpected mask on plain store"); VectorType *VTy = cast(InterleavedValues[0]->getType()); const DataLayout &DL = SI->getModule()->getDataLayout(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 6afb3c330d25b..b4671bb6bddf1 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -223,7 +223,8 @@ class AArch64TargetLowering : public TargetLowering { ArrayRef DeinterleaveValues) const override; bool lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef InterleaveValues) const override; + Instruction *Store, Value *Mask, + ArrayRef InterleaveValues) const override; bool isLegalAddImmediate(int64_t) const override; bool isLegalAddScalableImmediate(int64_t) const override; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 41bbf6b9dcf2e..61ed23e70c3fd 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -442,7 +442,8 @@ class RISCVTargetLowering : public TargetLowering { ArrayRef DeinterleaveValues) const override; bool lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef InterleaveValues) const override; + Instruction *Store, Value *Mask, + ArrayRef InterleaveValues) const override; bool lowerInterleavedVPLoad(VPIntrinsic *Load, Value *Mask, ArrayRef DeinterleaveRes) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index ddfacd970e950..1f92ec763700b 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -360,47 +360,71 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( } bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef InterleaveValues) const { + Instruction *Store, Value *Mask, ArrayRef InterleaveValues) const { unsigned Factor = InterleaveValues.size(); if (Factor > 8) return false; - assert(SI->isSimple()); - IRBuilder<> Builder(SI); + IRBuilder<> Builder(Store); auto *InVTy = cast(InterleaveValues[0]->getType()); - auto *PtrTy = SI->getPointerOperandType(); - const DataLayout &DL = SI->getDataLayout(); + const DataLayout &DL = Store->getDataLayout(); + Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); - if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), DL)) - return false; + Value *Ptr, *VL; + Align Alignment; + if (auto *SI = dyn_cast(Store)) { + assert(SI->isSimple()); + Ptr = SI->getPointerOperand(); + Alignment = SI->getAlign(); + assert(!Mask && "Unexpected mask on a store"); + Mask = Builder.getAllOnesMask(InVTy->getElementCount()); + VL = isa(InVTy) + ? Builder.CreateElementCount(XLenTy, InVTy->getElementCount()) + : Constant::getAllOnesValue(XLenTy); + } else { + auto *VPStore = cast(Store); + assert(VPStore->getIntrinsicID() == Intrinsic::vp_store && + "Unexpected intrinsic"); + Ptr = VPStore->getMemoryPointerParam(); + Alignment = VPStore->getPointerAlignment().value_or( + DL.getABITypeAlign(InVTy->getElementType())); + + assert(Mask && "vp.store needs a mask!"); + + Value *WideEVL = VPStore->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, DL, Factor)) + return false; - Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); + VL = Builder.CreateZExt( + Builder.CreateUDiv(WideEVL, + ConstantInt::get(WideEVL->getType(), Factor)), + XLenTy); + } + Type *PtrTy = Ptr->getType(); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL)) + return false; if (isa(InVTy)) { Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy}); - + Store->getModule(), FixedVssegIntrIds[Factor - 2], + {InVTy, PtrTy, XLenTy}); SmallVector Ops(InterleaveValues); - Value *VL = Builder.CreateElementCount(XLenTy, InVTy->getElementCount()); - Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount()); - Ops.append({SI->getPointerOperand(), Mask, VL}); - + Ops.append({Ptr, Mask, VL}); Builder.CreateCall(VssegNFunc, Ops); return true; } unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType()); unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); Type *VecTupTy = TargetExtType::get( - SI->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(SI->getContext()), + Store->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), NumElts * SEW / 8), Factor); - Value *VL = Constant::getAllOnesValue(XLenTy); - Value *Mask = Builder.getAllOnesMask(InVTy->getElementCount()); - Value *StoredVal = PoisonValue::get(VecTupTy); for (unsigned i = 0; i < Factor; ++i) StoredVal = Builder.CreateIntrinsic( @@ -408,10 +432,10 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( {StoredVal, InterleaveValues[i], Builder.getInt32(i)}); Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), ScalableVssegIntrIds[Factor - 2], + Store->getModule(), ScalableVssegIntrIds[Factor - 2], {VecTupTy, PtrTy, Mask->getType(), VL->getType()}); - Value *Operands[] = {StoredVal, SI->getPointerOperand(), Mask, VL, + Value *Operands[] = {StoredVal, Ptr, Mask, VL, ConstantInt::get(XLenTy, Log2_64(SEW))}; Builder.CreateCall(VssegNFunc, Operands); return true;