Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions llvm/include/llvm/Analysis/Loads.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,14 @@ LLVM_ABI bool isDereferenceableReadOnlyLoop(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if the loop \p L cannot fault on any iteration and only
/// contains read-only memory accesses. Also collect loads that are not
/// guaranteed to be dereferenceable.
LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);

/// Return true if we know that executing a load from this value cannot trap.
///
/// If DT and ScanFrom are specified this method performs context-sensitive
Expand Down
4 changes: 4 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1857,6 +1857,10 @@ class TargetTransformInfo {
/// \returns True if the target supports scalable vectors.
LLVM_ABI bool supportsScalableVectors() const;

/// \returns True if the target supports speculative load intrinsics (e.g.,
/// vp.load.ff).
LLVM_ABI bool supportsSpeculativeLoads() const;

/// \return true when scalable vectorization is preferred.
LLVM_ABI bool enableScalableVectorization() const;

Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1106,6 +1106,8 @@ class TargetTransformInfoImplBase {

virtual bool supportsScalableVectors() const { return false; }

virtual bool supportsSpeculativeLoads() const { return false; }

virtual bool enableScalableVectorization() const { return false; }

virtual bool hasActiveVectorLength() const { return false; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,11 @@ class LoopVectorizationLegality {
/// Returns a list of all known histogram operations in the loop.
bool hasHistograms() const { return !Histograms.empty(); }

/// Returns the loads that may fault and need to be speculative.
const SmallPtrSetImpl<const Instruction *> &getSpeculativeLoads() const {
return SpeculativeLoads;
}

PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
return &PSE;
}
Expand Down Expand Up @@ -630,6 +635,9 @@ class LoopVectorizationLegality {
/// may work on the same memory location.
SmallVector<HistogramInfo, 1> Histograms;

/// Hold all loads that need to be speculative.
SmallPtrSet<const Instruction *, 4> SpeculativeLoads;

/// BFI and PSI are used to check for profile guided size optimizations.
BlockFrequencyInfo *BFI;
ProfileSummaryInfo *PSI;
Expand Down
18 changes: 18 additions & 0 deletions llvm/lib/Analysis/Loads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -870,3 +870,21 @@ bool llvm::isDereferenceableReadOnlyLoop(
}
return true;
}

bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads(
Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
SmallVectorImpl<LoadInst *> *SpeculativeLoads,
SmallVectorImpl<const SCEVPredicate *> *Predicates) {
for (BasicBlock *BB : L->blocks()) {
for (Instruction &I : *BB) {
if (auto *LI = dyn_cast<LoadInst>(&I)) {
if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
SpeculativeLoads->push_back(LI);
} else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
I.mayThrow()) {
return false;
}
}
}
return true;
}
4 changes: 4 additions & 0 deletions llvm/lib/Analysis/TargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1457,6 +1457,10 @@ bool TargetTransformInfo::supportsScalableVectors() const {
return TTIImpl->supportsScalableVectors();
}

bool TargetTransformInfo::supportsSpeculativeLoads() const {
return TTIImpl->supportsSpeculativeLoads();
}

bool TargetTransformInfo::enableScalableVectorization() const {
return TTIImpl->enableScalableVectorization();
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
bool supportsScalableVectors() const override {
return ST->hasVInstructions();
}
bool supportsSpeculativeLoads() const override {
return ST->hasVInstructions();
}
bool enableOrderedReductions() const override { return true; }
bool enableScalableVectorization() const override {
return ST->hasVInstructions();
Expand Down
32 changes: 29 additions & 3 deletions llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1760,16 +1760,42 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
"Expected latch predecessor to be the early exiting block");

// TODO: Handle loops that may fault.
Predicates.clear();
if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates)) {
SmallVector<LoadInst *, 4> NonDerefLoads;
bool HasSafeAccess =
TTI->supportsSpeculativeLoads()
? isReadOnlyLoopWithSafeOrSpeculativeLoads(
TheLoop, PSE.getSE(), DT, AC, &NonDerefLoads, &Predicates)
: isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
&Predicates);
if (!HasSafeAccess) {
reportVectorizationFailure(
"Loop may fault",
"Cannot vectorize potentially faulting early exit loop",
"PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
return false;
}
// Speculative loads need to be unit-stride.
for (LoadInst *LI : NonDerefLoads) {
int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
if (Stride != 1) {
reportVectorizationFailure("Loop contains strided unbound access",
"Cannot vectorize early exit loop with "
"speculative non-unit-stride load",
"SpeculativeNonUnitStrideLoadEarlyExitLoop",
ORE, TheLoop);
return false;
}
SpeculativeLoads.insert(LI);
LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n");
}
// Support single speculative load for now.
if (NonDerefLoads.size() > 1) {
reportVectorizationFailure("Loop contains more than one unbound access",
"TooManySpeculativeLoadInEarlyExitLoop", ORE,
TheLoop);
return false;
}

[[maybe_unused]] const SCEV *SymbolicMaxBTC =
PSE.getSymbolicMaxBackedgeTakenCount();
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
return false;
}

if (!LVL.getSpeculativeLoads().empty()) {
reportVectorizationFailure("Auto-vectorization of loops with speculative "
"load is not supported",
"SpeculativeLoadsNotSupported", ORE, L);
return false;
}

// Entrance to the VPlan-native vectorization path. Outer loops are processed
// here. They may require CFG and instruction level transformations before
// even evaluating whether vectorization is profitable. Since we cannot modify
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
; REQUIRES: asserts
; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s

define ptr @two_unbound_access(ptr %first, ptr %last, ptr %addr2) {
; CHECK-LABEL: LV: Checking a loop in 'two_unbound_access'
; CHECK: LV: Not vectorizing: Loop contains more than one unbound access.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry], [ %first.next, %for.inc ]
%match.addr = phi ptr [ %addr2, %entry ], [ %match.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%match.value = load i32, ptr %match.addr, align 4
%cmp1 = icmp eq i32 %1, %match.value
br i1 %cmp1, label %early.exit, label %for.inc

for.inc:
%match.next = getelementptr inbounds nuw i8, ptr %match.addr, i64 4
%first.next = getelementptr inbounds i8, ptr %first.addr, i64 4
%exit = icmp eq ptr %first.next, %last
br i1 %exit, label %main.exit, label %for.body

early.exit:
br label %return

main.exit:
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %last, %main.exit ], [ %first.addr, %early.exit ]
ret ptr %retval
}

define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) {
; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access'
; CHECK: LV: Not vectorizing: Loop contains strided unbound access.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%cond2 = icmp eq i32 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}

define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) {
; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access'
; CHECK: LV: We can vectorize this loop!
; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported.
entry:
%cond = icmp eq ptr %first, %last
br i1 %cond, label %return, label %for.body

for.body:
%first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
%1 = load i32, ptr %first.addr, align 4
%cond2 = icmp eq i32 %1, %value
br i1 %cond2, label %for.end, label %for.inc

for.inc:
%first.next = getelementptr inbounds i32, ptr %first.addr, i64 1
%cond3 = icmp eq ptr %first.next, %last
br i1 %cond3, label %for.end, label %for.body

for.end:
%retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
br label %return

return:
%retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
ret ptr %retval
}
Loading