[SCEV][LAA] Support multiplication overflow computation #155236

annamthomas · 2025-08-25T12:03:05Z

Add support for identifying multiplication overflow in SCEV.
This is needed in LoopAccessAnalysis and that limitation was worked around
by 484417a.
This allows early-exit vectorization to work as expected in
vect.stats.ll test without needing the workaround.

llvmbot · 2025-08-25T12:03:39Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: None (annamthomas)

Changes

Add support for identifying multiplication overflow in SCEV.
This is needed in LoopAccessAnalysis and that limitation was worked around
by 484417a.
This patch reverts the workaround and the early-exit
vectorization works as expected in vect.stats.ll test.

Full diff: https://github.com/llvm/llvm-project/pull/155236.diff

3 Files Affected:

(modified) llvm/lib/Analysis/Loads.cpp (-8)
(modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+13-9)
(modified) llvm/lib/Analysis/ScalarEvolution.cpp (+9-3)

diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
index 9a2c9ba63ec7e..b5600f2b0822f 100644
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -332,14 +332,6 @@ bool llvm::isDereferenceableAndAlignedInLoop(
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     return false;
 
-  if (isa<SCEVCouldNotCompute>(BECount)) {
-    // TODO: Support symbolic max backedge taken counts for loops without
-    // computable backedge taken counts.
-    MaxBECount =
-        Predicates
-            ? SE.getPredicatedConstantMaxBackedgeTakenCount(L, *Predicates)
-            : SE.getConstantMaxBackedgeTakenCount(L);
-  }
   const auto &[AccessStart, AccessEnd] = getStartAndEndForAccess(
       L, PtrScev, LI->getType(), BECount, MaxBECount, &SE, nullptr, &DT, AC);
   if (isa<SCEVCouldNotCompute>(AccessStart) ||
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bceddd0325276..258fa982ed1d0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -193,8 +193,9 @@ RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
 /// Returns \p A + \p B, if it is guaranteed not to unsigned wrap. Otherwise
 /// return nullptr. \p A and \p B must have the same type.
 static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
-                                     ScalarEvolution &SE) {
-  if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B))
+                                     ScalarEvolution &SE,
+                                     const Instruction *CtxI) {
+  if (!SE.willNotOverflow(Instruction::Add, /*IsSigned=*/false, A, B, CtxI))
     return nullptr;
   return SE.getAddExpr(A, B);
 }
@@ -202,8 +203,9 @@ static const SCEV *addSCEVNoOverflow(const SCEV *A, const SCEV *B,
 /// Returns \p A * \p B, if it is guaranteed not to unsigned wrap. Otherwise
 /// return nullptr. \p A and \p B must have the same type.
 static const SCEV *mulSCEVOverflow(const SCEV *A, const SCEV *B,
-                                   ScalarEvolution &SE) {
-  if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B))
+                                   ScalarEvolution &SE,
+                                   const Instruction *CtxI) {
+  if (!SE.willNotOverflow(Instruction::Mul, /*IsSigned=*/false, A, B, CtxI))
     return nullptr;
   return SE.getMulExpr(A, B);
 }
@@ -232,11 +234,12 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
   Type *WiderTy = SE.getWiderType(MaxBTC->getType(), Step->getType());
   const SCEV *DerefBytesSCEV = SE.getConstant(WiderTy, DerefBytes);
 
+  // Context which dominates the entire loop.
+  auto *CtxI = L->getLoopPredecessor()->getTerminator();
   // Check if we have a suitable dereferencable assumption we can use.
   if (!StartPtrV->canBeFreed()) {
     RetainedKnowledge DerefRK = getKnowledgeValidInContext(
-        StartPtrV, {Attribute::Dereferenceable}, *AC,
-        L->getLoopPredecessor()->getTerminator(), DT);
+        StartPtrV, {Attribute::Dereferenceable}, *AC, CtxI, DT);
     if (DerefRK) {
       DerefBytesSCEV = SE.getUMaxExpr(
           DerefBytesSCEV, SE.getConstant(WiderTy, DerefRK.ArgValue));
@@ -260,12 +263,12 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
       SE.getMinusSCEV(AR->getStart(), StartPtr), WiderTy);
 
   const SCEV *OffsetAtLastIter =
-      mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE);
+      mulSCEVOverflow(MaxBTC, SE.getAbsExpr(Step, /*IsNSW=*/false), SE, CtxI);
   if (!OffsetAtLastIter)
     return false;
 
   const SCEV *OffsetEndBytes = addSCEVNoOverflow(
-      OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE);
+      OffsetAtLastIter, SE.getNoopOrZeroExtend(EltSize, WiderTy), SE, CtxI);
   if (!OffsetEndBytes)
     return false;
 
@@ -273,7 +276,8 @@ evaluatePtrAddRecAtMaxBTCWillNotWrap(const SCEVAddRecExpr *AR,
     // For positive steps, check if
     //  (AR->getStart() - StartPtr) + (MaxBTC  * Step) + EltSize <= DerefBytes,
     // while making sure none of the computations unsigned wrap themselves.
-    const SCEV *EndBytes = addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE);
+    const SCEV *EndBytes =
+        addSCEVNoOverflow(StartOffset, OffsetEndBytes, SE, CtxI);
     if (!EndBytes)
       return false;
     return SE.isKnownPredicate(CmpInst::ICMP_ULE, EndBytes, DerefBytesSCEV);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f60a1e9f22704..2b638003043d9 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2337,15 +2337,21 @@ bool ScalarEvolution::willNotOverflow(Instruction::BinaryOps BinOp, bool Signed,
   // Can we use context to prove the fact we need?
   if (!CtxI)
     return false;
-  // TODO: Support mul.
-  if (BinOp == Instruction::Mul)
-    return false;
   auto *RHSC = dyn_cast<SCEVConstant>(RHS);
   // TODO: Lift this limitation.
   if (!RHSC)
     return false;
   APInt C = RHSC->getAPInt();
   unsigned NumBits = C.getBitWidth();
+  if (BinOp == Instruction::Mul) {
+    // Multiplying by 0 or 1 never overflows
+    if (C.isZero() || C.isOne())
+      return true;
+    auto Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
+    APInt Limit = APInt::getMaxValue(NumBits).udiv(C);
+    // To avoid overflow, we need to make sure that LHS <= MAX / C.
+    return isKnownPredicateAt(Pred, LHS, getConstant(Limit), CtxI);
+  }
   bool IsSub = (BinOp == Instruction::Sub);
   bool IsNegativeConst = (Signed && C.isNegative());
   // Compute the direction and magnitude by which we need to check overflow.

llvm/lib/Analysis/ScalarEvolution.cpp

annamthomas · 2025-08-25T15:50:41Z

CI:
Failed Tests (1):
lldb-api :: functionalities/data-formatter/data-formatter-stl/generic/forward_list/TestDataFormatterGenericForwardList.py

Not related to this change.

nikic

LGTM

fhahn · 2025-08-26T09:54:33Z

llvm/lib/Analysis/Loads.cpp

Unfortunately I think we may still not be ready to remove the workaround, I added another test I think will regress with the workaround removed: 0abc8b0

I see. Actually, the reason for this patch is because we want to have the symbolic max BTC, so that the accessSize is represented in symbolic terms. We have a "fast path" downstream for java code where the symbolic size is supported and it requires avoiding this workaround. However, to move forward upstream, we need the following things:

Removing this workaround

Adding support for symbolic max BTC in evaluatePtrAddRecAtMaxBTCWillNotWrap

Both of this is in the plan for upstream.
The issue with the workaround is it blocks the "symbolic support" part for early-exit loops and we have cases that work with the symbolic expression.

I'm not sure if adding support for (2) will be enough to remove the workaround to catch the regressed test case.

@fhahn My thought is we can also add a symbolicMaxBECount argument to getStartAndEndForAccess API and propagate that information (i.e. use both symbolic and MaxBECount for start and end access).

There is a simpler alternative: have the workaround under an option which is ON by default.
The option will be off for the vect.stats.ll test case (to show it doesn't require it) and allows this change to go through. Also, we will have the freedom to turn off the workaround as required and see what still remains for support.

I'll try that out here.

Hmm, I don't think having an option is ideal. I think I managed to track down all cases we missed w/o the workaround (all related to using information from loop guards) and put up #155672

Add support for identifying multiplication overflow in SCEV. This is needed in LoopAccessAnalysis and that limitation was worked around by 484417a. This allows early-exit vectorization to work as expected in vect.stats.ll test without needing the workaround.

Add support for identifying multiplication overflow in SCEV. This is needed in LoopAccessAnalysis and that limitation was worked around by 484417a. This allows early-exit vectorization to work as expected in vect.stats.ll test without needing the workaround. (cherry picked from commit 00926a6)

annamthomas requested a review from fhahn August 25, 2025 12:03

annamthomas requested a review from nikic as a code owner August 25, 2025 12:03

llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Aug 25, 2025

nikic reviewed Aug 25, 2025

View reviewed changes

llvm/lib/Analysis/ScalarEvolution.cpp Outdated Show resolved Hide resolved

annamthomas force-pushed the scev-mul branch from 9cb234b to b9bd353 Compare August 25, 2025 13:51

nikic approved these changes Aug 26, 2025

View reviewed changes

fhahn reviewed Aug 26, 2025

View reviewed changes

annamthomas force-pushed the scev-mul branch from 15fb3a6 to 5bdf5af Compare August 26, 2025 13:06

llvmbot added the llvm:transforms label Aug 26, 2025

annamthomas added 2 commits August 26, 2025 12:15

Merge branch 'main' into scev-mul

1ea7501

Merge branch 'main' into scev-mul

78c61c1

annamthomas enabled auto-merge (squash) August 27, 2025 11:47

annamthomas merged commit 00926a6 into llvm:main Aug 27, 2025
9 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SCEV][LAA] Support multiplication overflow computation #155236

[SCEV][LAA] Support multiplication overflow computation #155236

Uh oh!

annamthomas commented Aug 25, 2025 •

edited

Loading

Uh oh!

llvmbot commented Aug 25, 2025 •

edited

Loading

Uh oh!

Uh oh!

annamthomas commented Aug 25, 2025

Uh oh!

nikic left a comment

Uh oh!

fhahn Aug 26, 2025

Uh oh!

annamthomas Aug 26, 2025

Uh oh!

annamthomas Aug 26, 2025

Uh oh!

fhahn Aug 27, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[SCEV][LAA] Support multiplication overflow computation #155236

[SCEV][LAA] Support multiplication overflow computation #155236

Uh oh!

Conversation

annamthomas commented Aug 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

llvmbot commented Aug 25, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

annamthomas commented Aug 25, 2025

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

fhahn Aug 26, 2025

Choose a reason for hiding this comment

Uh oh!

annamthomas Aug 26, 2025

Choose a reason for hiding this comment

Uh oh!

annamthomas Aug 26, 2025

Choose a reason for hiding this comment

Uh oh!

fhahn Aug 27, 2025

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

annamthomas commented Aug 25, 2025 •

edited

Loading

llvmbot commented Aug 25, 2025 •

edited

Loading