llvm · arcbbb · Sep 5, 2025 · Aug 7, 2025 · Aug 8, 2025 · Aug 9, 2025
diff --git a/llvm/include/llvm/Analysis/Loads.h b/llvm/include/llvm/Analysis/Loads.h
@@ -91,6 +91,14 @@ LLVM_ABI bool isDereferenceableReadOnlyLoop(
     Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
     SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
 
+/// Return true if the loop \p L cannot fault on any iteration and only
+/// contains read-only memory accesses. Also collect loads that are not
+/// guaranteed to be dereferenceable.
+LLVM_ABI bool isReadOnlyLoopWithSafeOrSpeculativeLoads(
+    Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+    SmallVectorImpl<LoadInst *> *SpeculativeLoads,
+    SmallVectorImpl<const SCEVPredicate *> *Predicates = nullptr);
+
 /// Return true if we know that executing a load from this value cannot trap.
 ///
 /// If DT and ScanFrom are specified this method performs context-sensitive

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1857,6 +1857,10 @@ class TargetTransformInfo {
   /// \returns True if the target supports scalable vectors.
   LLVM_ABI bool supportsScalableVectors() const;
 
+  /// \returns True if the target supports speculative load intrinsics (e.g.,
+  /// vp.load.ff).
+  LLVM_ABI bool supportsSpeculativeLoads() const;
+
   /// \return true when scalable vectorization is preferred.
   LLVM_ABI bool enableScalableVectorization() const;
 

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1106,6 +1106,8 @@ class TargetTransformInfoImplBase {
 
   virtual bool supportsScalableVectors() const { return false; }
 
+  virtual bool supportsSpeculativeLoads() const { return false; }
+
   virtual bool enableScalableVectorization() const { return false; }
 
   virtual bool hasActiveVectorLength() const { return false; }

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -445,6 +445,11 @@ class LoopVectorizationLegality {
   /// Returns a list of all known histogram operations in the loop.
   bool hasHistograms() const { return !Histograms.empty(); }
 
+  /// Returns the loads that may fault and need to be speculative.
+  const SmallPtrSetImpl<const Instruction *> &getSpeculativeLoads() const {
+    return SpeculativeLoads;
+  }
+
   PredicatedScalarEvolution *getPredicatedScalarEvolution() const {
     return &PSE;
   }
@@ -630,6 +635,9 @@ class LoopVectorizationLegality {
   /// may work on the same memory location.
   SmallVector<HistogramInfo, 1> Histograms;
 
+  /// Hold all loads that need to be speculative.
+  SmallPtrSet<const Instruction *, 4> SpeculativeLoads;
+
   /// BFI and PSI are used to check for profile guided size optimizations.
   BlockFrequencyInfo *BFI;
   ProfileSummaryInfo *PSI;

diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
@@ -870,3 +870,21 @@ bool llvm::isDereferenceableReadOnlyLoop(
   }
   return true;
 }
+
+bool llvm::isReadOnlyLoopWithSafeOrSpeculativeLoads(
+    Loop *L, ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+    SmallVectorImpl<LoadInst *> *SpeculativeLoads,
+    SmallVectorImpl<const SCEVPredicate *> *Predicates) {
+  for (BasicBlock *BB : L->blocks()) {
+    for (Instruction &I : *BB) {
+      if (auto *LI = dyn_cast<LoadInst>(&I)) {
+        if (!isDereferenceableAndAlignedInLoop(LI, L, *SE, *DT, AC, Predicates))
+          SpeculativeLoads->push_back(LI);
+      } else if (I.mayReadFromMemory() || I.mayWriteToMemory() ||
+                 I.mayThrow()) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1457,6 +1457,10 @@ bool TargetTransformInfo::supportsScalableVectors() const {
   return TTIImpl->supportsScalableVectors();
 }
 
+bool TargetTransformInfo::supportsSpeculativeLoads() const {
+  return TTIImpl->supportsSpeculativeLoads();
+}
+
 bool TargetTransformInfo::enableScalableVectorization() const {
   return TTIImpl->enableScalableVectorization();
 }

diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -110,6 +110,9 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
   bool supportsScalableVectors() const override {
     return ST->hasVInstructions();
   }
+  bool supportsSpeculativeLoads() const override {
+    return ST->hasVInstructions();
+  }
   bool enableOrderedReductions() const override { return true; }
   bool enableScalableVectorization() const override {
     return ST->hasVInstructions();

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1760,16 +1760,42 @@ bool LoopVectorizationLegality::isVectorizableEarlyExitLoop() {
   assert(LatchBB->getUniquePredecessor() == SingleUncountableExitingBlock &&
          "Expected latch predecessor to be the early exiting block");
 
-  // TODO: Handle loops that may fault.
   Predicates.clear();
-  if (!isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
-                                     &Predicates)) {
+  SmallVector<LoadInst *, 4> NonDerefLoads;
+  bool HasSafeAccess =
+      TTI->supportsSpeculativeLoads()
+          ? isReadOnlyLoopWithSafeOrSpeculativeLoads(
+                TheLoop, PSE.getSE(), DT, AC, &NonDerefLoads, &Predicates)
+          : isDereferenceableReadOnlyLoop(TheLoop, PSE.getSE(), DT, AC,
+                                          &Predicates);
+  if (!HasSafeAccess) {
     reportVectorizationFailure(
         "Loop may fault",
         "Cannot vectorize potentially faulting early exit loop",
         "PotentiallyFaultingEarlyExitLoop", ORE, TheLoop);
     return false;
   }
+  // Speculative loads need to be unit-stride.
+  for (LoadInst *LI : NonDerefLoads) {
+    int Stride = isConsecutivePtr(LI->getType(), LI->getPointerOperand());
+    if (Stride != 1) {
+      reportVectorizationFailure("Loop contains strided unbound access",
+                                 "Cannot vectorize early exit loop with "
+                                 "speculative non-unit-stride load",
+                                 "SpeculativeNonUnitStrideLoadEarlyExitLoop",
+                                 ORE, TheLoop);
+      return false;
+    }
+    SpeculativeLoads.insert(LI);
+    LLVM_DEBUG(dbgs() << "LV: Found speculative load: " << *LI << "\n");
+  }
+  // Support single speculative load for now.
+  if (NonDerefLoads.size() > 1) {
+    reportVectorizationFailure("Loop contains more than one unbound access",
+                               "TooManySpeculativeLoadInEarlyExitLoop", ORE,
+                               TheLoop);
+    return false;
+  }
 
   [[maybe_unused]] const SCEV *SymbolicMaxBTC =
       PSE.getSymbolicMaxBackedgeTakenCount();

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10041,6 +10041,13 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     return false;
   }
 
+  if (!LVL.getSpeculativeLoads().empty()) {
+    reportVectorizationFailure("Auto-vectorization of loops with speculative "
+                               "load is not supported",
+                               "SpeculativeLoadsNotSupported", ORE, L);
+    return false;
+  }
+
   // Entrance to the VPlan-native vectorization path. Outer loops are processed
   // here. They may require CFG and instruction level transformations before
   // even evaluating whether vectorization is profitable. Since we cannot modify

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll b/llvm/test/Transforms/LoopVectorize/RISCV/unbound-access-legality.ll
@@ -0,0 +1,89 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 -mtriple=riscv64 -mattr=+v -S %s | FileCheck %s
+
+define ptr @two_unbound_access(ptr %first, ptr %last, ptr %addr2) {
+; CHECK-LABEL: LV: Checking a loop in 'two_unbound_access'
+; CHECK:       LV: Not vectorizing: Loop contains more than one unbound access.
+entry:
+  %cond = icmp eq ptr %first, %last
+  br i1 %cond, label %return, label %for.body
+
+for.body:
+  %first.addr = phi ptr [ %first, %entry], [ %first.next, %for.inc ]
+  %match.addr = phi ptr [ %addr2, %entry ], [ %match.next, %for.inc ]
+  %1 = load i32, ptr %first.addr, align 4
+  %match.value = load i32, ptr %match.addr, align 4
+  %cmp1 = icmp eq i32 %1, %match.value
+  br i1 %cmp1, label %early.exit, label %for.inc
+
+for.inc:
+  %match.next = getelementptr inbounds nuw i8, ptr %match.addr, i64 4
+  %first.next = getelementptr inbounds i8, ptr %first.addr, i64 4
+  %exit = icmp eq ptr %first.next, %last
+  br i1 %exit, label %main.exit, label %for.body
+
+early.exit:
+  br label %return
+
+main.exit:
+  br label %return
+
+return:
+  %retval = phi ptr [ %first, %entry ], [ %last, %main.exit ], [ %first.addr, %early.exit ]
+  ret ptr %retval
+}
+
+define ptr @unbound_strided_access(ptr %first, ptr %last, i32 %value) {
+; CHECK-LABEL: LV: Checking a loop in 'unbound_strided_access'
+; CHECK:       LV: Not vectorizing: Loop contains strided unbound access.
+entry:
+  %cond = icmp eq ptr %first, %last
+  br i1 %cond, label %return, label %for.body
+
+for.body:
+  %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
+  %1 = load i32, ptr %first.addr, align 4
+  %cond2 = icmp eq i32 %1, %value
+  br i1 %cond2, label %for.end, label %for.inc
+
+for.inc:
+  %first.next = getelementptr inbounds i32, ptr %first.addr, i64 2
+  %cond3 = icmp eq ptr %first.next, %last
+  br i1 %cond3, label %for.end, label %for.body
+
+for.end:
+  %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
+  br label %return
+
+return:
+  %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
+  ret ptr %retval
+}
+
+define ptr @single_unbound_access(ptr %first, ptr %last, i32 %value) {
+; CHECK-LABEL: LV: Checking a loop in 'single_unbound_access'
+; CHECK:       LV: We can vectorize this loop!
+; CHECK-NEXT:  LV: Not vectorizing: Auto-vectorization of loops with speculative load is not supported.
+entry:
+  %cond = icmp eq ptr %first, %last
+  br i1 %cond, label %return, label %for.body
+
+for.body:
+  %first.addr = phi ptr [ %first, %entry ], [ %first.next, %for.inc ]
+  %1 = load i32, ptr %first.addr, align 4
+  %cond2 = icmp eq i32 %1, %value
+  br i1 %cond2, label %for.end, label %for.inc
+
+for.inc:
+  %first.next = getelementptr inbounds i32, ptr %first.addr, i64 1
+  %cond3 = icmp eq ptr %first.next, %last
+  br i1 %cond3, label %for.end, label %for.body
+
+for.end:
+  %retval.ph = phi ptr [ %first.addr, %for.body ], [ %last, %for.inc ]
+  br label %return
+
+return:
+  %retval = phi ptr [ %first, %entry ], [ %retval.ph, %for.end ]
+  ret ptr %retval
+}