diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index eb078c783d5f7..852196e589c59 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -44,6 +44,7 @@ using namespace llvm; using namespace VPlanPatternMatch; +using namespace SCEVPatternMatch; bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( VPlan &Plan, @@ -139,14 +140,77 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes( return true; } -// Check if a memory operation doesn't alias with memory operations in blocks -// between FirstBB and LastBB using scoped noalias metadata. -// For load hoisting, we only check writes in one direction. -// For store sinking, we check both reads and writes bidirectionally. -static bool canHoistOrSinkWithNoAliasCheck( - const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, - bool CheckReads, - const SmallPtrSetImpl *ExcludeRecipes = nullptr) { +/// Helper for extra no-alias checks via known-safe recipe and SCEV. +class SinkStoreInfo { + const SmallPtrSetImpl &ExcludeRecipes; + VPReplicateRecipe &GroupLeader; + ScalarEvolution &SE; + const Loop &L; + VPTypeAnalysis &TypeInfo; + + // Return true if \p A and \p B are known to not alias for all VFs in the + // plan, checked via the distance between the accesses + bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B) const { + if (A->getOpcode() != Instruction::Store || + B->getOpcode() != Instruction::Store) + return false; + + VPValue *AddrA = A->getOperand(1); + const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L); + VPValue *AddrB = B->getOperand(1); + const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L); + if (isa(SCEVA) || isa(SCEVB)) + return false; + + const APInt *Distance; + if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) + return false; + + const DataLayout &DL = SE.getDataLayout(); + Type *TyA = TypeInfo.inferScalarType(A->getOperand(0)); + uint64_t SizeA = DL.getTypeStoreSize(TyA); + Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); + uint64_t SizeB = DL.getTypeStoreSize(TyB); + + // Use the maximum store size to ensure no overlap from either direction. + // Currently only handles fixed sizes, as it is only used for + // replicating VPReplicateRecipes. + uint64_t MaxStoreSize = std::max(SizeA, SizeB); + + auto VFs = B->getParent()->getPlan()->vectorFactors(); + ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); + return Distance->abs().uge( + MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); + } + +public: + SinkStoreInfo(const SmallPtrSetImpl &ExcludeRecipes, + VPReplicateRecipe &GroupLeader, ScalarEvolution &SE, + const Loop &L, VPTypeAnalysis &TypeInfo) + : ExcludeRecipes(ExcludeRecipes), GroupLeader(GroupLeader), SE(SE), L(L), + TypeInfo(TypeInfo) {} + + /// Return true if \p R should be skipped during alias checking, either + /// because it's in the exclude set or because no-alias can be proven via + /// SCEV. + bool shouldSkip(VPRecipeBase &R) const { + auto *Store = dyn_cast(&R); + return ExcludeRecipes.contains(&R) || + (Store && isNoAliasViaDistance(Store, &GroupLeader)); + } +}; + +/// Check if a memory operation doesn't alias with memory operations in blocks +/// between \p FirstBB and \p LastBB using scoped noalias metadata. If +/// \p SinkInfo is std::nullopt, only recipes that may write to memory are +/// checked (for load hoisting). Otherwise recipes that both read and write +/// memory are checked, and SCEV is used to prove no-alias between the group +/// leader and other replicate recipes (for store sinking). +static bool +canHoistOrSinkWithNoAliasCheck(const MemoryLocation &MemLoc, + VPBasicBlock *FirstBB, VPBasicBlock *LastBB, + std::optional SinkInfo = {}) { + bool CheckReads = SinkInfo.has_value(); if (!MemLoc.AATags.Scope) return false; @@ -158,7 +222,7 @@ static bool canHoistOrSinkWithNoAliasCheck( "Expected at most one successor in block chain"); auto *VPBB = cast(Block); for (VPRecipeBase &R : *VPBB) { - if (ExcludeRecipes && ExcludeRecipes->contains(&R)) + if (SinkInfo && SinkInfo->shouldSkip(R)) continue; // Skip recipes that don't need checking. @@ -4273,8 +4337,7 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, // Check that the load doesn't alias with stores between first and last. auto LoadLoc = vputils::getMemoryLocation(*EarliestLoad); - if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB, - /*CheckReads=*/false)) + if (!LoadLoc || !canHoistOrSinkWithNoAliasCheck(*LoadLoc, FirstBB, LastBB)) continue; // Collect common metadata from all loads in the group. @@ -4301,7 +4364,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE, } static bool -canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink) { +canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink, + ScalarEvolution &SE, const Loop &L, + VPTypeAnalysis &TypeInfo) { auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front()); if (!StoreLoc || !StoreLoc->AATags.Scope) return false; @@ -4313,8 +4378,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef StoresToSink) { VPBasicBlock *FirstBB = StoresToSink.front()->getParent(); VPBasicBlock *LastBB = StoresToSink.back()->getParent(); - return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, - /*CheckReads=*/true, &StoresToSinkSet); + SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); + return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, SinkInfo); } void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, @@ -4325,13 +4390,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE, return; VPDominatorTree VPDT(Plan); + VPTypeAnalysis TypeInfo(Plan); for (auto &Group : Groups) { sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) { return VPDT.properlyDominates(A, B); }); - if (!canSinkStoreWithNoAliasCheck(Group)) + if (!canSinkStoreWithNoAliasCheck(Group, SE, *L, TypeInfo)) continue; // Use the last (most dominated) store's location for the unconditional diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll index cdbe9bb555834..7450fcccbb484 100644 --- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll @@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ] ; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16 ; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16 @@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) { ; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]] ; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1 -; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true) ; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00) -; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0 -; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] -; CHECK: [[PRED_STORE_IF]]: ; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0 -; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] +; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0 +; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1 +; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]] +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0 +; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]] +; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1 +; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]] +; CHECK: [[PRED_STORE_IF]]: +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]] ; CHECK: [[PRED_STORE_CONTINUE]]: -; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1 -; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]] +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 +; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_IF2]]: -; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1 -; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]] +; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16 +; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]] ; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]] ; CHECK: [[PRED_STORE_CONTINUE3]]: -; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0 -; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]] -; CHECK: [[PRED_STORE_IF4]]: -; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]] -; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]] -; CHECK: [[PRED_STORE_CONTINUE5]]: -; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1 -; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_IF6]]: -; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]] -; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16 -; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]] -; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]] -; CHECK: [[PRED_STORE_CONTINUE7]]: ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2 ; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]