Skip to content

Conversation

@fhahn
Copy link
Contributor

@fhahn fhahn commented Dec 2, 2025

Extend the logic add in #168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV.

@llvmbot
Copy link
Member

llvmbot commented Dec 2, 2025

@llvm/pr-subscribers-vectorizers

@llvm/pr-subscribers-llvm-transforms

Author: Florian Hahn (fhahn)

Changes

Extend the logic add in #168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV.


Full diff: https://github.com/llvm/llvm-project/pull/170347.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+55-4)
  • (modified) llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll (+18-30)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 38024aa6897fc..bccc48ba33223 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -139,6 +139,43 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
   return true;
 }
 
+// Return true if \p A and \p B are known to not alias for all VFs in the plan,
+// checked via the distance between the accesses
+static bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B,
+                                 ScalarEvolution &SE, const Loop &L,
+                                 VPTypeAnalysis &TypeInfo) {
+  if (A->getOpcode() != Instruction::Store ||
+      B->getOpcode() != Instruction::Store)
+    return false;
+
+  VPValue *AddrA = A->getOperand(1);
+  const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L);
+  VPValue *AddrB = B->getOperand(1);
+  const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L);
+  if (isa<SCEVCouldNotCompute>(SCEVA) || isa<SCEVCouldNotCompute>(SCEVB))
+    return false;
+
+  const SCEV *Distance = SE.getMinusSCEV(SCEVA, SCEVB);
+  auto *ConstDist = dyn_cast<SCEVConstant>(Distance);
+  if (!ConstDist)
+    return false;
+
+  const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+  Type *TyA = TypeInfo.inferScalarType(A->getOperand(0));
+  TypeSize SizeA = DL.getTypeStoreSize(TyA);
+  Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));
+  TypeSize SizeB = DL.getTypeStoreSize(TyB);
+
+  // Use the maximum store size to ensure no overlap from either direction.
+  uint64_t MaxStoreSize =
+      std::max(SizeA.getFixedValue(), SizeB.getFixedValue());
+  const APInt &DistValue = ConstDist->getAPInt();
+  auto VFs = B->getParent()->getPlan()->vectorFactors();
+  ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
+  return DistValue.abs().uge(
+      MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
+}
+
 // Check if a memory operation doesn't alias with memory operations in blocks
 // between FirstBB and LastBB using scoped noalias metadata.
 // For load hoisting, we only check writes in one direction.
@@ -146,7 +183,10 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
 static bool canHoistOrSinkWithNoAliasCheck(
     const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
     bool CheckReads,
-    const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
+    const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr,
+    ScalarEvolution *SE = nullptr, const Loop *L = nullptr,
+    VPTypeAnalysis *TypeInfo = nullptr,
+    ArrayRef<VPReplicateRecipe *> MemOpsInGroup = {}) {
   if (!MemLoc.AATags.Scope)
     return false;
 
@@ -165,6 +205,13 @@ static bool canHoistOrSinkWithNoAliasCheck(
       if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))
         continue;
 
+      // For stores, check if we can use SCEV to prove no-alias.
+      if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) {
+        if (SE && L && TypeInfo && !MemOpsInGroup.empty() &&
+            isNoAliasViaDistance(Store, MemOpsInGroup[0], *SE, *L, *TypeInfo))
+          continue;
+      }
+
       auto Loc = vputils::getMemoryLocation(R);
       if (!Loc)
         // Conservatively assume aliasing for memory operations without
@@ -4296,7 +4343,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
 }
 
 static bool
-canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
+canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink,
+                             ScalarEvolution *SE, const Loop *L,
+                             VPTypeAnalysis &TypeInfo) {
   auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
   if (!StoreLoc || !StoreLoc->AATags.Scope)
     return false;
@@ -4309,7 +4358,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
   VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
   VPBasicBlock *LastBB = StoresToSink.back()->getParent();
   return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
-                                        /*CheckReads=*/true, &StoresToSinkSet);
+                                        /*CheckReads=*/true, &StoresToSinkSet,
+                                        SE, L, &TypeInfo, StoresToSink);
 }
 
 void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
@@ -4320,13 +4370,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
     return;
 
   VPDominatorTree VPDT(Plan);
+  VPTypeAnalysis TypeInfo(Plan);
 
   for (auto &Group : Groups) {
     sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
       return VPDT.properlyDominates(A, B);
     });
 
-    if (!canSinkStoreWithNoAliasCheck(Group))
+    if (!canSinkStoreWithNoAliasCheck(Group, &SE, L, TypeInfo))
       continue;
 
     // Use the last (most dominated) store's location for the unconditional
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index cdbe9bb555834..7450fcccbb484 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
 ; CHECK-NEXT:    [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
 ; CHECK-NEXT:    [[IV:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP17:%.*]] = add i64 [[INDEX]], 16
@@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
 ; CHECK-NEXT:    [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
-; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
 ; CHECK-NEXT:    [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
-; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
-; CHECK-NEXT:    br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK:       [[PRED_STORE_IF]]:
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT:    [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
-; CHECK-NEXT:    store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
+; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
+; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
+; CHECK-NEXT:    [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
+; CHECK-NEXT:    [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
+; CHECK-NEXT:    store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1
+; CHECK-NEXT:    store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK:       [[PRED_STORE_IF]]:
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE]]
 ; CHECK:       [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
-; CHECK-NEXT:    br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
+; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_IF2]]:
-; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
-; CHECK-NEXT:    store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT:    [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16
+; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]]
 ; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE3]]
 ; CHECK:       [[PRED_STORE_CONTINUE3]]:
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT:    br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
-; CHECK:       [[PRED_STORE_IF4]]:
-; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT:    store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
-; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE5]]
-; CHECK:       [[PRED_STORE_CONTINUE5]]:
-; CHECK-NEXT:    [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT:    br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
-; CHECK:       [[PRED_STORE_IF6]]:
-; CHECK-NEXT:    [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT:    store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
-; CHECK-NEXT:    store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT:    br label %[[PRED_STORE_CONTINUE7]]
-; CHECK:       [[PRED_STORE_CONTINUE7]]:
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
 ; CHECK-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
 ; CHECK-NEXT:    br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]

Copy link
Contributor

@artagnon artagnon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like we're re-inventing parts of the various AAs, which is unavoidable, but I wonder if we can factor this out into something like VPAliasAnalysis at some point?

auto VFs = B->getParent()->getPlan()->vectorFactors();
ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
return DistValue.abs().uge(
MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will getFixedValue not assert here, as MaxVF could be a scalable? Also, is it possible to pass BestVF or hasn't that been decided yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not at the moment, we should never have replicate recipes for scalable plans

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I find this fact very non-trivial: could you kindly add a comment saying this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, added thanks

auto VFs = B->getParent()->getPlan()->vectorFactors();
ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
return DistValue.abs().uge(
MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I find this fact very non-trivial: could you kindly add a comment saying this?

// leader (all members of the group write to the same address with the
// same size).
if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) {
if (GroupLeader &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Didn't we reorder the arguments so that arguments after GroupLeader can be nullptr in the API? Better to have GroupLeader last, so all the others are guaranteed to be non-nullptr?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure re-ordering makes things safer, as they are still all default arguments and it still relies on the caller either passing nullptr for all or non of them. I added a dedicated helper class to encapsulate the extra functionality, to make this safer.

@github-actions
Copy link

github-actions bot commented Dec 3, 2025

🐧 Linux x64 Test Results

  • 187105 tests passed
  • 4926 tests skipped

✅ The build succeeded and all tests passed.

@fhahn fhahn force-pushed the vplan-sink-noalias-scev branch from ecdb93b to ed84aeb Compare December 3, 2025 11:34
Copy link
Contributor

@artagnon artagnon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SinkStoreInfo is a good improvement! LGTM, thanks!

const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
bool CheckReads,
const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
std::optional<SinkStoreInfo> SinkInfo = std::nullopt) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::optional<SinkStoreInfo> SinkInfo = std::nullopt) {
std::optional<SinkStoreInfo> SinkInfo = {}) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

Comment on lines 196 to 200
if (ExcludeRecipes.contains(&R))
return true;
if (auto *Store = dyn_cast<VPReplicateRecipe>(&R))
return isNoAliasViaDistance(Store, &GroupLeader);
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (ExcludeRecipes.contains(&R))
return true;
if (auto *Store = dyn_cast<VPReplicateRecipe>(&R))
return isNoAliasViaDistance(Store, &GroupLeader);
return false;
auto *Store = dyn_cast<VPReplicateRecipe>(&R);
return ExcludeRecipes.contains(&R) || (Store && isNoAliasViaDistance(Store, &GroupLeader));

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done thanks

uint64_t SizeA = DL.getTypeStoreSize(TyA);
Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));
uint64_t SizeB = DL.getTypeStoreSize(TyB);
// Use the maximum store size to ensure no overlap from either direction.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// Use the maximum store size to ensure no overlap from either direction.
// Use the maximum store size to ensure no overlap from either direction.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

VPBasicBlock *LastBB = StoresToSink.back()->getParent();
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
/*CheckReads=*/true, &StoresToSinkSet);
SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);
SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo);

to clearly distinguish from TypeInfo?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done thanks

if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance)))
return false;

const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
const DataLayout &DL = SE.getDataLayout();

fhahn added 4 commits December 9, 2025 13:25
Extend the logic add in llvm#168771
to also allow sinking stores past stores in the same noalias set by
checking if we can prove no-alias via the distance between accesses,
checked via SCEV.
@fhahn fhahn force-pushed the vplan-sink-noalias-scev branch from c6bd47a to dbaf335 Compare December 9, 2025 14:14
@github-actions
Copy link

github-actions bot commented Dec 9, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@fhahn fhahn enabled auto-merge (squash) December 9, 2025 16:08
@fhahn fhahn merged commit c61a481 into llvm:main Dec 9, 2025
9 of 10 checks passed
llvm-sync bot pushed a commit to arm/arm-toolchain that referenced this pull request Dec 9, 2025
…rent offsets. (#170347)

Extend the logic add in llvm/llvm-project#168771
to also allow sinking stores past stores in the same noalias set by
checking if we can prove no-alias via the distance between accesses,
checked via SCEV.

PR: llvm/llvm-project#170347
@fhahn fhahn deleted the vplan-sink-noalias-scev branch December 9, 2025 16:58
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants