From 6cf4303ab1d78ac9b9350b4ba339df070b96488a Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 2 Dec 2025 18:34:54 +0000 Subject: [PATCH 1/5] [VPlan] Use SCEV to prove non-aliasing for stores at different offsets. Extend the logic add in https://github.com/llvm/llvm-project/pull/168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV. --- .../Transforms/Vectorize/VPlanTransforms.cpp | 59 +++++++++++++++++-- ...predicated-loads-with-predicated-stores.ll | 48 ++++++--------- 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index eb078c783d5f7..4331dc7bb88f0 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -139,6 +139,43 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return true; } +// Return true if \p A and \p B are known to not alias for all VFs in the plan, +// checked via the distance between the accesses +static bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B, + ScalarEvolution &SE, const Loop &L, + VPTypeAnalysis &TypeInfo) { + if (A->getOpcode() != Instruction::Store || + B->getOpcode() != Instruction::Store) + return false; + + VPValue *AddrA = A->getOperand(1); + const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L); + VPValue *AddrB = B->getOperand(1); + const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L); + if (isa(SCEVA) || isa(SCEVB)) + return false; + + const SCEV *Distance = SE.getMinusSCEV(SCEVA, SCEVB); + auto *ConstDist = dyn_cast(Distance); + if (!ConstDist) + return false; + + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); + TypeSize SizeA = DL.getTypeStoreSize(TyA); + Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); + TypeSize SizeB = DL.getTypeStoreSize(TyB); + + // Use the maximum store size to ensure no overlap from either direction. + uint64_t MaxStoreSize = + std::max(SizeA.getFixedValue(), SizeB.getFixedValue()); + const APInt &DistValue = ConstDist->getAPInt(); + auto VFs = B->getParent()->getPlan()->vectorFactors(); + ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); + return DistValue.abs().uge( + MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); +} + // Check if a memory operation doesn't alias with memory operations in blocks // between FirstBB and LastBB using scoped noalias metadata. // For load hoisting, we only check writes in one direction. @@ -146,7 +183,10 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( static bool canHoistOrSinkWithNoAliasCheck( const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, bool CheckReads, - const SmallPtrSetImpl *ExcludeRecipes = nullptr) { + const SmallPtrSetImpl *ExcludeRecipes = nullptr, + ScalarEvolution *SE = nullptr, const Loop *L = nullptr, + VPTypeAnalysis *TypeInfo = nullptr, + ArrayRef MemOpsInGroup = {}) { if (!MemLoc.AATags.Scope) return false; @@ -165,6 +205,13 @@ static bool canHoistOrSinkWithNoAliasCheck( if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory())) continue; + // For stores, check if we can use SCEV to prove no-alias. + if (auto *Store = dyn_cast(&R)) { + if (SE && L && TypeInfo && !MemOpsInGroup.empty() && + isNoAliasViaDistance(Store, MemOpsInGroup[0], *SE, *L, *TypeInfo)) + continue; + } + auto Loc = vputils::getMemoryLocation(R); if (!Loc) // Conservatively assume aliasing for memory operations without @@ -4301,7 +4348,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, } static bool -canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink) { +canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, + ScalarEvolution *SE, const Loop *L, + VPTypeAnalysis &TypeInfo) { auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front()); if (!StoreLoc || !StoreLoc->AATags.Scope) return false; @@ -4314,7 +4363,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink) { VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); VPBasicBlock *LastBB = StoresToSink.back()->getParent(); return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, - /*CheckReads=*/true, &StoresToSinkSet); + /*CheckReads=*/true, &StoresToSinkSet, + SE, L, &TypeInfo, StoresToSink); } void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, @@ -4325,13 +4375,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, return; VPDominatorTree VPDT(Plan); + VPTypeAnalysis TypeInfo(Plan); for (auto &Group : Groups) { sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { return VPDT.properlyDominates(A, B); }); - if (!canSinkStoreWithNoAliasCheck(Group)) + if (!canSinkStoreWithNoAliasCheck(Group, &SE, L, TypeInfo)) continue; // Use the last (most dominated) store's location for the unconditional diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index cdbe9bb555834..7450fcccbb484 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] ; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16 @@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]] ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00) -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0 -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0 +; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1 +; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_IF2]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1 -; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] -; CHECK: [[PRED_STORE_IF4]]: -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; CHECK: [[PRED_STORE_CONTINUE5]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_IF6]]: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]] From b7f5e0dffe11a7830f86e1aebd2a83f1b815146c Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 3 Dec 2025 11:15:24 +0000 Subject: [PATCH 2/5] !fixup address comments, thanks --- .../Transforms/Vectorize/VPlanTransforms.cpp | 41 ++++++++++--------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 4331dc7bb88f0..26993e7eeeabf 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -44,6 +44,7 @@ using namespace llvm; using namespace VPlanPatternMatch; +using namespace SCEVPatternMatch; bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( VPlan &Plan, @@ -155,38 +156,36 @@ static bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B, if (isa(SCEVA) || isa(SCEVB)) return false; - const SCEV *Distance = SE.getMinusSCEV(SCEVA, SCEVB); - auto *ConstDist = dyn_cast(Distance); - if (!ConstDist) + const APInt *Distance; + if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) return false; const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); - TypeSize SizeA = DL.getTypeStoreSize(TyA); + uint64_t SizeA = DL.getTypeStoreSize(TyA); Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); - TypeSize SizeB = DL.getTypeStoreSize(TyB); - + uint64_t SizeB = DL.getTypeStoreSize(TyB); // Use the maximum store size to ensure no overlap from either direction. - uint64_t MaxStoreSize = - std::max(SizeA.getFixedValue(), SizeB.getFixedValue()); - const APInt &DistValue = ConstDist->getAPInt(); + uint64_t MaxStoreSize = std::max(SizeA, SizeB); + auto VFs = B->getParent()->getPlan()->vectorFactors(); ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); - return DistValue.abs().uge( + return Distance->abs().uge( MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); } // Check if a memory operation doesn't alias with memory operations in blocks -// between FirstBB and LastBB using scoped noalias metadata. -// For load hoisting, we only check writes in one direction. -// For store sinking, we check both reads and writes bidirectionally. +// between FirstBB and LastBB using scoped noalias metadata. If \p CheckReads is +// false, we only check recipes that may write to memory. Otherwise we check +// recipes that both read and write memory. If a \p GroupLeader is passed, SCEV +// is used to try to prove no-alias between \p GroupLeader and other replicate +// recipes. static bool canHoistOrSinkWithNoAliasCheck( const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, bool CheckReads, const SmallPtrSetImpl *ExcludeRecipes = nullptr, - ScalarEvolution *SE = nullptr, const Loop *L = nullptr, - VPTypeAnalysis *TypeInfo = nullptr, - ArrayRef MemOpsInGroup = {}) { + VPReplicateRecipe *GroupLeader = nullptr, ScalarEvolution *SE = nullptr, + const Loop *L = nullptr, VPTypeAnalysis *TypeInfo = nullptr) { if (!MemLoc.AATags.Scope) return false; @@ -205,10 +204,12 @@ static bool canHoistOrSinkWithNoAliasCheck( if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory())) continue; - // For stores, check if we can use SCEV to prove no-alias. + // For stores, check if we can use SCEV to prove no-alias with the group + // leader (all members of the group write to the same address with the + // same size). if (auto *Store = dyn_cast(&R)) { - if (SE && L && TypeInfo && !MemOpsInGroup.empty() && - isNoAliasViaDistance(Store, MemOpsInGroup[0], *SE, *L, *TypeInfo)) + if (GroupLeader && + isNoAliasViaDistance(Store, GroupLeader, *SE, *L, *TypeInfo)) continue; } @@ -4364,7 +4365,7 @@ canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, VPBasicBlock *LastBB = StoresToSink.back()->getParent(); return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, /*CheckReads=*/true, &StoresToSinkSet, - SE, L, &TypeInfo, StoresToSink); + StoresToSink[0], SE, L, &TypeInfo); } void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, From 3f0b7139beaadc315a9e9b421175a5d0258b48a5 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Mon, 8 Dec 2025 13:26:19 +0000 Subject: [PATCH 3/5] !fixup --- .../Transforms/Vectorize/VPlanTransforms.cpp | 128 ++++++++++-------- 1 file changed, 71 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 26993e7eeeabf..b83e08e14cf0b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -140,52 +140,77 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return true; } -// Return true if \p A and \p B are known to not alias for all VFs in the plan, -// checked via the distance between the accesses -static bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B, - ScalarEvolution &SE, const Loop &L, - VPTypeAnalysis &TypeInfo) { - if (A->getOpcode() != Instruction::Store || - B->getOpcode() != Instruction::Store) - return false; +/// Helper for extra no-alias checks via known-safe recipe and SCEV. +class SinkStoreInfo { + const SmallPtrSetImpl &ExcludeRecipes; + VPReplicateRecipe &GroupLeader; + ScalarEvolution &SE; + const Loop &L; + VPTypeAnalysis &TypeInfo; + + // Return true if \p A and \p B are known to not alias for all VFs in the + // plan, checked via the distance between the accesses + bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B) const { + if (A->getOpcode() != Instruction::Store || + B->getOpcode() != Instruction::Store) + return false; - VPValue *AddrA = A->getOperand(1); - const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L); - VPValue *AddrB = B->getOperand(1); - const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L); - if (isa(SCEVA) || isa(SCEVB)) - return false; + VPValue *AddrA = A->getOperand(1); + const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L); + VPValue *AddrB = B->getOperand(1); + const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L); + if (isa(SCEVA) || isa(SCEVB)) + return false; - const APInt *Distance; - if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) - return false; + const APInt *Distance; + if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) + return false; - const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); - Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); - uint64_t SizeA = DL.getTypeStoreSize(TyA); - Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); - uint64_t SizeB = DL.getTypeStoreSize(TyB); - // Use the maximum store size to ensure no overlap from either direction. - uint64_t MaxStoreSize = std::max(SizeA, SizeB); - - auto VFs = B->getParent()->getPlan()->vectorFactors(); - ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); - return Distance->abs().uge( - MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); -} + const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); + uint64_t SizeA = DL.getTypeStoreSize(TyA); + Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); + uint64_t SizeB = DL.getTypeStoreSize(TyB); + // Use the maximum store size to ensure no overlap from either direction. + // Currently only handles fixed sizes, as it is only used for + // replicating VPReplicateRecipes. + uint64_t MaxStoreSize = std::max(SizeA, SizeB); + + auto VFs = B->getParent()->getPlan()->vectorFactors(); + ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); + return Distance->abs().uge( + MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); + } + +public: + SinkStoreInfo(const SmallPtrSetImpl &ExcludeRecipes, + VPReplicateRecipe &GroupLeader, ScalarEvolution &SE, + const Loop &L, VPTypeAnalysis &TypeInfo) + : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L), + TypeInfo(TypeInfo) {} + + /// Return true if \p R should be skipped during alias checking, either + /// because it's in the exclude set or because no-alias can be proven via + /// SCEV. + bool shouldSkip(VPRecipeBase &R) const { + if (ExcludeRecipes.contains(&R)) + return true; + if (auto *Store = dyn_cast(&R)) + return isNoAliasViaDistance(Store, &GroupLeader); + return false; + } +}; -// Check if a memory operation doesn't alias with memory operations in blocks -// between FirstBB and LastBB using scoped noalias metadata. If \p CheckReads is -// false, we only check recipes that may write to memory. Otherwise we check -// recipes that both read and write memory. If a \p GroupLeader is passed, SCEV -// is used to try to prove no-alias between \p GroupLeader and other replicate -// recipes. +/// Check if a memory operation doesn't alias with memory operations in blocks +/// between \p FirstBB and \p LastBB using scoped noalias metadata. If +/// \p SinkInfo is std::nullopt, only recipes that may write to memory are +/// checked (for load hoisting). Otherwise recipes that both read and write +/// memory are checked, and SCEV is used to prove no-alias between the group +/// leader and other replicate recipes (for store sinking). static bool canHoistOrSinkWithNoAliasCheck( const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, - bool CheckReads, - const SmallPtrSetImpl *ExcludeRecipes = nullptr, - VPReplicateRecipe *GroupLeader = nullptr, ScalarEvolution *SE = nullptr, - const Loop *L = nullptr, VPTypeAnalysis *TypeInfo = nullptr) { + std::optional SinkInfo = std::nullopt) { + bool CheckReads = SinkInfo.has_value(); if (!MemLoc.AATags.Scope) return false; @@ -197,22 +222,13 @@ static bool canHoistOrSinkWithNoAliasCheck( "Expected at most one successor in block chain"); auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { - if (ExcludeRecipes && ExcludeRecipes->contains(&R)) + if (SinkInfo && SinkInfo->shouldSkip(R)) continue; // Skip recipes that don't need checking. if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory())) continue; - // For stores, check if we can use SCEV to prove no-alias with the group - // leader (all members of the group write to the same address with the - // same size). - if (auto *Store = dyn_cast(&R)) { - if (GroupLeader && - isNoAliasViaDistance(Store, GroupLeader, *SE, *L, *TypeInfo)) - continue; - } - auto Loc = vputils::getMemoryLocation(R); if (!Loc) // Conservatively assume aliasing for memory operations without @@ -4321,8 +4337,7 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, // Check that the load doesn't alias with stores between first and last. auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad); - if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB, - /*CheckReads=*/false)) + if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB)) continue; // Collect common metadata from all loads in the group. @@ -4350,7 +4365,7 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, static bool canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, - ScalarEvolution *SE, const Loop *L, + ScalarEvolution &SE, const Loop &L, VPTypeAnalysis &TypeInfo) { auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front()); if (!StoreLoc || !StoreLoc->AATags.Scope) @@ -4363,9 +4378,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); VPBasicBlock *LastBB = StoresToSink.back()->getParent(); - return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, - /*CheckReads=*/true, &StoresToSinkSet, - StoresToSink[0], SE, L, &TypeInfo); + SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); + return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, Info); } void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, @@ -4383,7 +4397,7 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, return VPDT.properlyDominates(A, B); }); - if (!canSinkStoreWithNoAliasCheck(Group, &SE, L, TypeInfo)) + if (!canSinkStoreWithNoAliasCheck(Group, SE, *L, TypeInfo)) continue; // Use the last (most dominated) store's location for the unconditional From dbaf3354c5caa963c06fd91739b7b618e259ad39 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 9 Dec 2025 14:13:45 +0000 Subject: [PATCH 4/5] !fixup address comments, thanks --- .../lib/Transforms/Vectorize/VPlanTransforms.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index b83e08e14cf0b..51ad9a09f7459 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -166,11 +166,12 @@ class SinkStoreInfo { if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) return false; - const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); + const DataLayout &DL = SE.getDataLayout(); Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); uint64_t SizeA = DL.getTypeStoreSize(TyA); Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); uint64_t SizeB = DL.getTypeStoreSize(TyB); + // Use the maximum store size to ensure no overlap from either direction. // Currently only handles fixed sizes, as it is only used for // replicating VPReplicateRecipes. @@ -193,11 +194,8 @@ class SinkStoreInfo { /// because it's in the exclude set or because no-alias can be proven via /// SCEV. bool shouldSkip(VPRecipeBase &R) const { - if (ExcludeRecipes.contains(&R)) - return true; - if (auto *Store = dyn_cast(&R)) - return isNoAliasViaDistance(Store, &GroupLeader); - return false; + auto *Store = dyn_cast(&R); + return ExcludeRecipes.contains(&R) || (Store && isNoAliasViaDistance(Store, &GroupLeader)); } }; @@ -209,7 +207,7 @@ class SinkStoreInfo { /// leader and other replicate recipes (for store sinking). static bool canHoistOrSinkWithNoAliasCheck( const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, - std::optional SinkInfo = std::nullopt) { + std::optional SinkInfo = {}) { bool CheckReads = SinkInfo.has_value(); if (!MemLoc.AATags.Scope) return false; @@ -4378,8 +4376,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); VPBasicBlock *LastBB = StoresToSink.back()->getParent(); - SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); - return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, Info); + SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); + return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, SinkInfo); } void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, From 1624327f3563e983e0aa0d33c80ff8646bd8dda9 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Tue, 9 Dec 2025 14:17:43 +0000 Subject: [PATCH 5/5] !fixup fix formatting --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 51ad9a09f7459..852196e589c59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -194,8 +194,9 @@ class SinkStoreInfo { /// because it's in the exclude set or because no-alias can be proven via /// SCEV. bool shouldSkip(VPRecipeBase &R) const { - auto *Store = dyn_cast(&R); - return ExcludeRecipes.contains(&R) || (Store && isNoAliasViaDistance(Store, &GroupLeader)); + auto *Store = dyn_cast(&R); + return ExcludeRecipes.contains(&R) || + (Store && isNoAliasViaDistance(Store, &GroupLeader)); } }; @@ -205,9 +206,10 @@ class SinkStoreInfo { /// checked (for load hoisting). Otherwise recipes that both read and write /// memory are checked, and SCEV is used to prove no-alias between the group /// leader and other replicate recipes (for store sinking). -static bool canHoistOrSinkWithNoAliasCheck( - const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, - std::optional SinkInfo = {}) { +static bool +canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc, + VPBasicBlock *FirstBB, VPBasicBlock *LastBB, + std::optional SinkInfo = {}) { bool CheckReads = SinkInfo.has_value(); if (!MemLoc.AATags.Scope) return false;