diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h index 84564563de8e3..7f28afafb3500 100644 --- a/llvm/include/llvm/Analysis/Loads.h +++ b/llvm/include/llvm/Analysis/Loads.h @@ -86,9 +86,11 @@ LLVM_ABI bool isDereferenceableAndAlignedInLoop( SmallVectorImpl *Predicates = nullptr); /// Return true if the loop \p L cannot fault on any iteration and only -/// contains read-only memory accesses. -LLVM_ABI bool isDereferenceableReadOnlyLoop( +/// contains read-only memory accesses. Also collect loads that are not +/// guaranteed to be dereferenceable. +LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, SmallVectorImpl *Predicates = nullptr); /// Return true if we know that executing a load from this value cannot trap. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index aa4550de455e0..0fcc5d1b3fb98 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -843,6 +843,9 @@ class TargetTransformInfo { /// Return true if the target supports strided load. LLVM_ABI bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const; + /// Return true if the target supports speculative load. + LLVM_ABI bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const; + /// Return true is the target supports interleaved access for the given vector /// type \p VTy, interleave factor \p Factor, alignment \p Alignment and /// address space \p AddrSpace. diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h index abdbca04488db..6aca7e1412271 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -374,6 +374,10 @@ class TargetTransformInfoImplBase { return false; } + virtual bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const { + return false; + } + virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const { diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h index 43ff084816d18..3b5638f3f570a 100644 --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -445,6 +445,11 @@ class LoopVectorizationLegality { /// Returns a list of all known histogram operations in the loop. bool hasHistograms() const { return !Histograms.empty(); } + /// Returns the loads that may fault and need to be speculative. + const SmallPtrSetImpl &getSpeculativeLoads() const { + return SpeculativeLoads; + } + PredicatedScalarEvolution *getPredicatedScalarEvolution() const { return &PSE; } @@ -630,6 +635,9 @@ class LoopVectorizationLegality { /// may work on the same memory location. SmallVector Histograms; + /// Hold all loads that need to be speculative. + SmallPtrSet SpeculativeLoads; + /// BFI and PSI are used to check for profile guided size optimizations. BlockFrequencyInfo *BFI; ProfileSummaryInfo *PSI; diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp index 78d0887d5d87e..c39587131dac0 100644 --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -856,16 +856,19 @@ bool llvm::canReplacePointersIfEqual(const Value *From, const Value *To, return isPointerAlwaysReplaceable(From, To, DL); } -bool llvm::isDereferenceableReadOnlyLoop( +bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads( Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC, + SmallVectorImpl *SpeculativeLoads, SmallVectorImpl *Predicates) { for (BasicBlock *BB : L->blocks()) { for (Instruction &I : *BB) { if (auto *LI = dyn_cast(&I)) { if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates)) - return false; - } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) + SpeculativeLoads->push_back(LI); + } else if (I.mayReadFromMemory() || I.mayWriteToMemory() || + I.mayThrow()) { return false; + } } } return true; diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp index c7eb2ec18c679..3e94c816488fd 100644 --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -531,6 +531,11 @@ bool TargetTransformInfo::isLegalStridedLoadStore(Type *DataType, return TTIImpl->isLegalStridedLoadStore(DataType, Alignment); } +bool TargetTransformInfo::isLegalSpeculativeLoad(Type *DataType, + Align Alignment) const { + return TTIImpl->isLegalSpeculativeLoad(DataType, Alignment); +} + bool TargetTransformInfo::isLegalInterleavedAccessType( VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const { diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 03e54b3d395e3..192435d134e0f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -24405,6 +24405,18 @@ bool RISCVTargetLowering::isLegalStridedLoadStore(EVT DataType, return true; } +bool RISCVTargetLowering::isLegalSpeculativeLoad(EVT DataType, + Align Alignment) const { + if (!Subtarget.hasVInstructions()) + return false; + + EVT ScalarType = DataType.getScalarType(); + if (!isLegalElementTypeForRVV(ScalarType)) + return false; + + return true; +} + MachineInstr * RISCVTargetLowering::EmitKCFICheck(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator &MBBI, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 433b8be5c562e..a99f6fb38c8b6 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -425,6 +425,10 @@ class RISCVTargetLowering : public TargetLowering { /// alignment is legal. bool isLegalStridedLoadStore(EVT DataType, Align Alignment) const; + /// Return true if a speculative load of the given result type and + /// alignment is legal. + bool isLegalSpeculativeLoad(EVT DataType, Align Alignment) const; + unsigned getMaxSupportedInterleaveFactor() const override { return 8; } bool fallBackToDAGISel(const Instruction &Inst) const override; diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 05d504cbcb6bb..ba13bdf89063a 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -324,6 +324,11 @@ class RISCVTTIImpl final : public BasicTTIImplBase { return TLI->isLegalStridedLoadStore(DataTypeVT, Alignment); } + bool isLegalSpeculativeLoad(Type *DataType, Align Alignment) const override { + EVT DataTypeVT = TLI->getValueType(DL, DataType); + return TLI->isLegalSpeculativeLoad(DataTypeVT, Alignment); + } + bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const override { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp index c47fd9421fddd..1d7ec4f43311f 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -1760,16 +1760,39 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() { assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock && "Expected latch predecessor to be the early exiting block"); - // TODO: Handle loops that may fault. Predicates.clear(); - if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC, - &Predicates)) { + SmallVector NonDerefLoads; + if (!isReadOnlyLoopWithSafeOrSpeculativeLoads(TheLoop, PSE.getSE(), DT, AC, + &NonDerefLoads, &Predicates)) { reportVectorizationFailure( "Loop may fault", "Cannot vectorize potentially faulting early exit loop", "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop); return false; } + // Check non-dereferenceable loads if any. + for (LoadInst *LI : NonDerefLoads) { + // Only support unit-stride access for now. + int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand()); + if (Stride != 1) { + reportVectorizationFailure("Loop contains strided unbound access", + "Cannot vectorize early exit loop with " + "speculative strided load", + "SpeculativeNonUnitStrideLoadEarlyExitLoop", + ORE, TheLoop); + return false; + } + if (!TTI->isLegalSpeculativeLoad(LI->getType(), LI->getAlign())) { + reportVectorizationFailure("Loop may fault", + "Cannot vectorize early exit loop with " + "illegal speculative load", + "IllegalSpeculativeLoadEarlyExitLoop", ORE, + TheLoop); + return false; + } + SpeculativeLoads.insert(LI); + LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n"); + } [[maybe_unused]] const SCEV *SymbolicMaxBTC = PSE.getSymbolicMaxBackedgeTakenCount(); diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 9667b506e594f..790a5236d4f04 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) { return false; } + if (!LVL.getSpeculativeLoads().empty()) { + reportVectorizationFailure("Auto-vectorization of loops with speculative " + "load is not supported", + "SpeculativeLoadsNotSupported", ORE, L); + return false; + } + // Entrance to the VPlan-native vectorization path. Outer loops are processed // here. They may require CFG and instruction level transformations before // even evaluating whether vectorization is profitable. Since we cannot modify diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll new file mode 100644 index 0000000000000..07e64784da84b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll @@ -0,0 +1,84 @@ +; REQUIRES: asserts +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s + +define ptr @unsupported_data_type(ptr %first, ptr %last, i128 %value) { +; CHECK-LABEL: LV: Checking a loop in 'unsupported_data_type' +; CHECK: LV: Not vectorizing: Loop may fault. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i128, ptr %first.addr, align 4 + %cond2 = icmp eq i128 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i128, ptr %first.addr, i64 1 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} + +define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) { +; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access' +; CHECK: LV: Not vectorizing: Loop contains strided unbound access. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %cond2 = icmp eq i32 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i32, ptr %first.addr, i64 2 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} + +define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) { +; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access' +; CHECK: LV: We can vectorize this loop! +; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported. +entry: + %cond = icmp eq ptr %first, %last + br i1 %cond, label %return, label %for.body + +for.body: + %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ] + %1 = load i32, ptr %first.addr, align 4 + %cond2 = icmp eq i32 %1, %value + br i1 %cond2, label %for.end, label %for.inc + +for.inc: + %first.next = getelementptr inbounds i32, ptr %first.addr, i64 1 + %cond3 = icmp eq ptr %first.next, %last + br i1 %cond3, label %for.end, label %for.body + +for.end: + %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ] + br label %return + +return: + %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ] + ret ptr %retval +} diff --git a/llvm/unittests/Analysis/LoadsTest.cpp b/llvm/unittests/Analysis/LoadsTest.cpp index c4f5b22318e34..fab2aeb745ad0 100644 --- a/llvm/unittests/Analysis/LoadsTest.cpp +++ b/llvm/unittests/Analysis/LoadsTest.cpp @@ -195,7 +195,10 @@ loop.end: assert(Header->getName() == "loop"); Loop *L = LI.getLoopFor(Header); - return isDereferenceableReadOnlyLoop(L, &SE, &DT, &AC); + SmallVector NonDerefLoads; + return isReadOnlyLoopWithSafeOrSpeculativeLoads(L, &SE, &DT, &AC, + &NonDerefLoads) && + NonDerefLoads.empty(); }; ASSERT_TRUE(IsDerefReadOnlyLoop(F1));