-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[VPlan] Use SCEV to prove non-aliasing for stores at different offsets. #170347
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesExtend the logic add in #168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV. Full diff: https://github.com/llvm/llvm-project/pull/170347.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 38024aa6897fc..bccc48ba33223 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -139,6 +139,43 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
return true;
}
+// Return true if \p A and \p B are known to not alias for all VFs in the plan,
+// checked via the distance between the accesses
+static bool isNoAliasViaDistance(VPReplicateRecipe *A, VPReplicateRecipe *B,
+ ScalarEvolution &SE, const Loop &L,
+ VPTypeAnalysis &TypeInfo) {
+ if (A->getOpcode() != Instruction::Store ||
+ B->getOpcode() != Instruction::Store)
+ return false;
+
+ VPValue *AddrA = A->getOperand(1);
+ const SCEV *SCEVA = vputils::getSCEVExprForVPValue(AddrA, SE, &L);
+ VPValue *AddrB = B->getOperand(1);
+ const SCEV *SCEVB = vputils::getSCEVExprForVPValue(AddrB, SE, &L);
+ if (isa<SCEVCouldNotCompute>(SCEVA) || isa<SCEVCouldNotCompute>(SCEVB))
+ return false;
+
+ const SCEV *Distance = SE.getMinusSCEV(SCEVA, SCEVB);
+ auto *ConstDist = dyn_cast<SCEVConstant>(Distance);
+ if (!ConstDist)
+ return false;
+
+ const DataLayout &DL = L.getHeader()->getModule()->getDataLayout();
+ Type *TyA = TypeInfo.inferScalarType(A->getOperand(0));
+ TypeSize SizeA = DL.getTypeStoreSize(TyA);
+ Type *TyB = TypeInfo.inferScalarType(B->getOperand(0));
+ TypeSize SizeB = DL.getTypeStoreSize(TyB);
+
+ // Use the maximum store size to ensure no overlap from either direction.
+ uint64_t MaxStoreSize =
+ std::max(SizeA.getFixedValue(), SizeB.getFixedValue());
+ const APInt &DistValue = ConstDist->getAPInt();
+ auto VFs = B->getParent()->getPlan()->vectorFactors();
+ ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT);
+ return DistValue.abs().uge(
+ MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue());
+}
+
// Check if a memory operation doesn't alias with memory operations in blocks
// between FirstBB and LastBB using scoped noalias metadata.
// For load hoisting, we only check writes in one direction.
@@ -146,7 +183,10 @@ bool VPlanTransforms::tryToConvertVPInstructionsToVPRecipes(
static bool canHoistOrSinkWithNoAliasCheck(
const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB,
bool CheckReads,
- const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) {
+ const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr,
+ ScalarEvolution *SE = nullptr, const Loop *L = nullptr,
+ VPTypeAnalysis *TypeInfo = nullptr,
+ ArrayRef<VPReplicateRecipe *> MemOpsInGroup = {}) {
if (!MemLoc.AATags.Scope)
return false;
@@ -165,6 +205,13 @@ static bool canHoistOrSinkWithNoAliasCheck(
if (!R.mayWriteToMemory() && !(CheckReads && R.mayReadFromMemory()))
continue;
+ // For stores, check if we can use SCEV to prove no-alias.
+ if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) {
+ if (SE && L && TypeInfo && !MemOpsInGroup.empty() &&
+ isNoAliasViaDistance(Store, MemOpsInGroup[0], *SE, *L, *TypeInfo))
+ continue;
+ }
+
auto Loc = vputils::getMemoryLocation(R);
if (!Loc)
// Conservatively assume aliasing for memory operations without
@@ -4296,7 +4343,9 @@ void VPlanTransforms::hoistPredicatedLoads(VPlan &Plan, ScalarEvolution &SE,
}
static bool
-canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
+canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink,
+ ScalarEvolution *SE, const Loop *L,
+ VPTypeAnalysis &TypeInfo) {
auto StoreLoc = vputils::getMemoryLocation(*StoresToSink.front());
if (!StoreLoc || !StoreLoc->AATags.Scope)
return false;
@@ -4309,7 +4358,8 @@ canSinkStoreWithNoAliasCheck(ArrayRef<VPReplicateRecipe *> StoresToSink) {
VPBasicBlock *FirstBB = StoresToSink.front()->getParent();
VPBasicBlock *LastBB = StoresToSink.back()->getParent();
return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB,
- /*CheckReads=*/true, &StoresToSinkSet);
+ /*CheckReads=*/true, &StoresToSinkSet,
+ SE, L, &TypeInfo, StoresToSink);
}
void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
@@ -4320,13 +4370,14 @@ void VPlanTransforms::sinkPredicatedStores(VPlan &Plan, ScalarEvolution &SE,
return;
VPDominatorTree VPDT(Plan);
+ VPTypeAnalysis TypeInfo(Plan);
for (auto &Group : Groups) {
sort(Group, [&VPDT](VPReplicateRecipe *A, VPReplicateRecipe *B) {
return VPDT.properlyDominates(A, B);
});
- if (!canSinkStoreWithNoAliasCheck(Group))
+ if (!canSinkStoreWithNoAliasCheck(Group, &SE, L, TypeInfo))
continue;
// Use the last (most dominated) store's location for the unconditional
diff --git a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
index cdbe9bb555834..7450fcccbb484 100644
--- a/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
+++ b/llvm/test/Transforms/LoopVectorize/hoist-predicated-loads-with-predicated-stores.ll
@@ -764,7 +764,7 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE7:.*]] ]
+; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE3:.*]] ]
; CHECK-NEXT: [[INDEX:%.*]] = mul i64 [[INDEX1]], 16
; CHECK-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 16
@@ -781,42 +781,30 @@ define void @sink_multiple_store_groups_noalias_via_scev(ptr %dst, ptr %src) {
; CHECK-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP22]], align 8, !alias.scope [[META78]]
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x double> poison, double [[TMP13]], i32 0
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = insertelement <2 x double> [[TMP15]], double [[TMP14]], i32 1
-; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i1> [[TMP10]], splat (i1 true)
; CHECK-NEXT: [[TMP34:%.*]] = fadd <2 x double> [[WIDE_LOAD]], splat (double 8.000000e+00)
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[TMP16]], i32 0
-; CHECK-NEXT: br i1 [[TMP24]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
-; CHECK: [[PRED_STORE_IF]]:
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[TMP34]], i32 0
-; CHECK-NEXT: store double [[TMP19]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP18]], i32 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> [[TMP31]], ptr [[TMP21]], i32 1
+; CHECK-NEXT: [[TMP20:%.*]] = select <2 x i1> [[TMP10]], <2 x double> [[WIDE_LOAD]], <2 x double> [[TMP34]]
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[TMP20]], i32 0
+; CHECK-NEXT: store double [[TMP32]], ptr [[TMP18]], align 8, !alias.scope [[META81:![0-9]+]], !noalias [[META78]]
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP20]], i32 1
+; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
+; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; CHECK: [[PRED_STORE_IF]]:
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[TMP18]], i64 16
+; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP24]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
; CHECK: [[PRED_STORE_CONTINUE]]:
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x i1> [[TMP16]], i32 1
-; CHECK-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3:.*]]
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
+; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF2:.*]], label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_IF2]]:
-; CHECK-NEXT: [[TMP21:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[TMP34]], i32 1
-; CHECK-NEXT: store double [[TMP33]], ptr [[TMP21]], align 8, !alias.scope [[META81]], !noalias [[META78]]
+; CHECK-NEXT: [[TMP35:%.*]] = getelementptr i8, ptr [[TMP21]], i64 16
+; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP35]], align 8, !alias.scope [[META81]], !noalias [[META78]]
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE3]]
; CHECK: [[PRED_STORE_CONTINUE3]]:
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP10]], i32 0
-; CHECK-NEXT: br i1 [[TMP23]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5:.*]]
-; CHECK: [[PRED_STORE_IF4]]:
-; CHECK-NEXT: [[TMP31:%.*]] = getelementptr double, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT: store double [[TMP13]], ptr [[TMP31]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP31]], i64 16
-; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP37]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE5]]
-; CHECK: [[PRED_STORE_CONTINUE5]]:
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP10]], i32 1
-; CHECK-NEXT: br i1 [[TMP25]], label %[[PRED_STORE_IF6:.*]], label %[[PRED_STORE_CONTINUE7]]
-; CHECK: [[PRED_STORE_IF6]]:
-; CHECK-NEXT: [[TMP32:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
-; CHECK-NEXT: store double [[TMP14]], ptr [[TMP32]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP32]], i64 16
-; CHECK-NEXT: store double 1.000000e+01, ptr [[TMP47]], align 8, !alias.scope [[META81]], !noalias [[META78]]
-; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE7]]
-; CHECK: [[PRED_STORE_CONTINUE7]]:
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], 2
; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
; CHECK-NEXT: br i1 [[TMP52]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP83:![0-9]+]]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I feel like we're re-inventing parts of the various AAs, which is unavoidable, but I wonder if we can factor this out into something like VPAliasAnalysis at some point?
| auto VFs = B->getParent()->getPlan()->vectorFactors(); | ||
| ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); | ||
| return DistValue.abs().uge( | ||
| MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will getFixedValue not assert here, as MaxVF could be a scalable? Also, is it possible to pass BestVF or hasn't that been decided yet?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not at the moment, we should never have replicate recipes for scalable plans
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I find this fact very non-trivial: could you kindly add a comment saying this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yep, added thanks
| auto VFs = B->getParent()->getPlan()->vectorFactors(); | ||
| ElementCount MaxVF = *max_element(VFs, ElementCount::isKnownLT); | ||
| return DistValue.abs().uge( | ||
| MaxVF.multiplyCoefficientBy(MaxStoreSize).getFixedValue()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I find this fact very non-trivial: could you kindly add a comment saying this?
| // leader (all members of the group write to the same address with the | ||
| // same size). | ||
| if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) { | ||
| if (GroupLeader && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we reorder the arguments so that arguments after GroupLeader can be nullptr in the API? Better to have GroupLeader last, so all the others are guaranteed to be non-nullptr?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure re-ordering makes things safer, as they are still all default arguments and it still relies on the caller either passing nullptr for all or non of them. I added a dedicated helper class to encapsulate the extra functionality, to make this safer.
🐧 Linux x64 Test Results
✅ The build succeeded and all tests passed. |
ecdb93b to
ed84aeb
Compare
artagnon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SinkStoreInfo is a good improvement! LGTM, thanks!
| const MemoryLocation &MemLoc, VPBasicBlock *FirstBB, VPBasicBlock *LastBB, | ||
| bool CheckReads, | ||
| const SmallPtrSetImpl<VPRecipeBase *> *ExcludeRecipes = nullptr) { | ||
| std::optional<SinkStoreInfo> SinkInfo = std::nullopt) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| std::optional<SinkStoreInfo> SinkInfo = std::nullopt) { | |
| std::optional<SinkStoreInfo> SinkInfo = {}) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
| if (ExcludeRecipes.contains(&R)) | ||
| return true; | ||
| if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) | ||
| return isNoAliasViaDistance(Store, &GroupLeader); | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| if (ExcludeRecipes.contains(&R)) | |
| return true; | |
| if (auto *Store = dyn_cast<VPReplicateRecipe>(&R)) | |
| return isNoAliasViaDistance(Store, &GroupLeader); | |
| return false; | |
| auto *Store = dyn_cast<VPReplicateRecipe>(&R); | |
| return ExcludeRecipes.contains(&R) || (Store && isNoAliasViaDistance(Store, &GroupLeader)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done thanks
| uint64_t SizeA = DL.getTypeStoreSize(TyA); | ||
| Type *TyB = TypeInfo.inferScalarType(B->getOperand(0)); | ||
| uint64_t SizeB = DL.getTypeStoreSize(TyB); | ||
| // Use the maximum store size to ensure no overlap from either direction. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // Use the maximum store size to ensure no overlap from either direction. | |
| // Use the maximum store size to ensure no overlap from either direction. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
| VPBasicBlock *LastBB = StoresToSink.back()->getParent(); | ||
| return canHoistOrSinkWithNoAliasCheck(*StoreLoc, FirstBB, LastBB, | ||
| /*CheckReads=*/true, &StoresToSinkSet); | ||
| SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| SinkStoreInfo Info(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); | |
| SinkStoreInfo SinkInfo(StoresToSinkSet, *StoresToSink[0], SE, L, TypeInfo); |
to clearly distinguish from TypeInfo?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done thanks
| if (!match(SE.getMinusSCEV(SCEVA, SCEVB), m_scev_APInt(Distance))) | ||
| return false; | ||
|
|
||
| const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); | |
| const DataLayout &DL = SE.getDataLayout(); |
Extend the logic add in llvm#168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV.
c6bd47a to
dbaf335
Compare
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
…rent offsets. (#170347) Extend the logic add in llvm/llvm-project#168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV. PR: llvm/llvm-project#170347
Extend the logic add in #168771 to also allow sinking stores past stores in the same noalias set by checking if we can prove no-alias via the distance between accesses, checked via SCEV.