-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[MemorySSA] Handle MemoryDef optimized away during cloning #117883
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-analysis Author: Nikita Popov (nikic) ChangesWhen determining the replacement access during cloning, we currently leave accesses for instructions that are not in the VMap alone. This is correct if the instruction is not in VMap because it hasn't been cloned, but not if it has been cloned and then removed. In that case, we should walk up to the defining access, like in other simplification cases. To distinguish the two cases, pass in a callback that queries whether the instruction is part of the cloned region. An alternative to this would be to delay removal of dead instructions in SimpleLoopUnswitch until after MSSA cloning. Fixes #116228. Full diff: https://github.com/llvm/llvm-project/pull/117883.diff 3 Files Affected:
diff --git a/llvm/include/llvm/Analysis/MemorySSAUpdater.h b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
index 055feceefb0546..b925ddd5de12a7 100644
--- a/llvm/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/llvm/include/llvm/Analysis/MemorySSAUpdater.h
@@ -276,7 +276,9 @@ class MemorySSAUpdater {
// representation for. No other cases are supported.
void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+ function_ref<bool(BasicBlock *)> IsInClonedRegion,
bool CloneWasSimplified = false);
+
template <typename Iter>
void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
Iter ValuesBegin, Iter ValuesEnd,
diff --git a/llvm/lib/Analysis/MemorySSAUpdater.cpp b/llvm/lib/Analysis/MemorySSAUpdater.cpp
index f672bd0e1e133e..7226963e9e7d0a 100644
--- a/llvm/lib/Analysis/MemorySSAUpdater.cpp
+++ b/llvm/lib/Analysis/MemorySSAUpdater.cpp
@@ -565,24 +565,26 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
return MA;
}
-static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
- const ValueToValueMapTy &VMap,
- PhiToDefMap &MPhiMap,
- MemorySSA *MSSA) {
+static MemoryAccess *getNewDefiningAccessForClone(
+ MemoryAccess *MA, const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+ MemorySSA *MSSA, function_ref<bool(BasicBlock *BB)> IsInClonedRegion) {
MemoryAccess *InsnDefining = MA;
if (MemoryDef *DefMUD = dyn_cast<MemoryDef>(InsnDefining)) {
- if (!MSSA->isLiveOnEntryDef(DefMUD)) {
- Instruction *DefMUDI = DefMUD->getMemoryInst();
- assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
- if (Instruction *NewDefMUDI =
- cast_or_null<Instruction>(VMap.lookup(DefMUDI))) {
- InsnDefining = MSSA->getMemoryAccess(NewDefMUDI);
- if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
- // The clone was simplified, it's no longer a MemoryDef, look up.
- InsnDefining = getNewDefiningAccessForClone(
- DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA);
- }
- }
+ if (MSSA->isLiveOnEntryDef(DefMUD))
+ return DefMUD;
+
+ // If the MemoryDef is not part of the cloned region, leave it alone.
+ Instruction *DefMUDI = DefMUD->getMemoryInst();
+ assert(DefMUDI && "Found MemoryUseOrDef with no Instruction.");
+ if (!IsInClonedRegion(DefMUDI->getParent()))
+ return DefMUD;
+
+ auto *NewDefMUDI = cast_or_null<Instruction>(VMap.lookup(DefMUDI));
+ InsnDefining = NewDefMUDI ? MSSA->getMemoryAccess(NewDefMUDI) : nullptr;
+ if (!InsnDefining || isa<MemoryUse>(InsnDefining)) {
+ // The clone was simplified, it's no longer a MemoryDef, look up.
+ InsnDefining = getNewDefiningAccessForClone(
+ DefMUD->getDefiningAccess(), VMap, MPhiMap, MSSA, IsInClonedRegion);
}
} else {
MemoryPhi *DefPhi = cast<MemoryPhi>(InsnDefining);
@@ -593,10 +595,10 @@ static MemoryAccess *getNewDefiningAccessForClone(MemoryAccess *MA,
return InsnDefining;
}
-void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
- const ValueToValueMapTy &VMap,
- PhiToDefMap &MPhiMap,
- bool CloneWasSimplified) {
+void MemorySSAUpdater::cloneUsesAndDefs(
+ BasicBlock *BB, BasicBlock *NewBB, const ValueToValueMapTy &VMap,
+ PhiToDefMap &MPhiMap, function_ref<bool(BasicBlock *)> IsInClonedRegion,
+ bool CloneWasSimplified) {
const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
if (!Acc)
return;
@@ -615,7 +617,7 @@ void MemorySSAUpdater::cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
MemoryAccess *NewUseOrDef = MSSA->createDefinedAccess(
NewInsn,
getNewDefiningAccessForClone(MUD->getDefiningAccess(), VMap,
- MPhiMap, MSSA),
+ MPhiMap, MSSA, IsInClonedRegion),
/*Template=*/CloneWasSimplified ? nullptr : MUD,
/*CreationMustSucceed=*/false);
if (NewUseOrDef)
@@ -668,8 +670,13 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
ArrayRef<BasicBlock *> ExitBlocks,
const ValueToValueMapTy &VMap,
bool IgnoreIncomingWithNoClones) {
- PhiToDefMap MPhiMap;
+ SmallSetVector<BasicBlock *, 16> Blocks;
+ for (BasicBlock *BB : concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ Blocks.insert(BB);
+ auto IsInClonedRegion = [&](BasicBlock *BB) { return Blocks.contains(BB); };
+
+ PhiToDefMap MPhiMap;
auto FixPhiIncomingValues = [&](MemoryPhi *Phi, MemoryPhi *NewPhi) {
assert(Phi && NewPhi && "Invalid Phi nodes.");
BasicBlock *NewPhiBB = NewPhi->getBlock();
@@ -692,9 +699,10 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
continue;
// Determine incoming value and add it as incoming from IncBB.
- NewPhi->addIncoming(
- getNewDefiningAccessForClone(IncomingAccess, VMap, MPhiMap, MSSA),
- IncBB);
+ NewPhi->addIncoming(getNewDefiningAccessForClone(IncomingAccess, VMap,
+ MPhiMap, MSSA,
+ IsInClonedRegion),
+ IncBB);
}
if (auto *SingleAccess = onlySingleValue(NewPhi)) {
MPhiMap[Phi] = SingleAccess;
@@ -716,13 +724,13 @@ void MemorySSAUpdater::updateForClonedLoop(const LoopBlocksRPO &LoopBlocks,
MPhiMap[MPhi] = NewPhi;
}
// Update Uses and Defs.
- cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap);
+ cloneUsesAndDefs(BB, NewBlock, VMap, MPhiMap, IsInClonedRegion);
};
- for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ for (auto *BB : Blocks)
ProcessBlock(BB);
- for (auto *BB : llvm::concat<BasicBlock *const>(LoopBlocks, ExitBlocks))
+ for (auto *BB : Blocks)
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
if (MemoryAccess *NewPhi = MPhiMap.lookup(MPhi))
FixPhiIncomingValues(MPhi, cast<MemoryPhi>(NewPhi));
@@ -741,7 +749,9 @@ void MemorySSAUpdater::updateForClonedBlockIntoPred(
PhiToDefMap MPhiMap;
if (MemoryPhi *MPhi = MSSA->getMemoryAccess(BB))
MPhiMap[MPhi] = MPhi->getIncomingValueForBlock(P1);
- cloneUsesAndDefs(BB, P1, VM, MPhiMap, /*CloneWasSimplified=*/true);
+ cloneUsesAndDefs(
+ BB, P1, VM, MPhiMap, [&](BasicBlock *BB) { return BB == P1; },
+ /*CloneWasSimplified=*/true);
}
template <typename Iter>
diff --git a/llvm/test/Analysis/MemorySSA/pr116227.ll b/llvm/test/Analysis/MemorySSA/pr116227.ll
new file mode 100644
index 00000000000000..cd9686b018aa62
--- /dev/null
+++ b/llvm/test/Analysis/MemorySSA/pr116227.ll
@@ -0,0 +1,61 @@
+; RUN: opt -disable-output -passes="loop-mssa(simple-loop-unswitch<nontrivial>),print<memoryssa>" -verify-memoryssa < %s 2>&1 | FileCheck %s
+
+declare ptr @malloc() allockind("alloc,uninitialized")
+
+; CHECK-LABEL: MemorySSA for function: test
+
+; CHECK: for.body.us:
+; CHECK-NEXT: 3 = MemoryPhi({entry.split.us,liveOnEntry},{for.body.us,3})
+
+; CHECK: for.body:
+; CHECK-NEXT: 2 = MemoryPhi({entry.split,liveOnEntry},{for.body,1})
+; CHECK-NEXT: 1 = MemoryDef(2)
+; CHECK-NEXT: %call.i = call ptr @malloc()
+
+define void @test(i1 %arg) {
+entry:
+ br label %for.body
+
+for.body:
+ %call.i = call ptr @malloc()
+ %cmp.i = icmp ne ptr %call.i, null
+ %or.cond.i = select i1 %cmp.i, i1 %arg, i1 false
+ br i1 %or.cond.i, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; CHECK-LABEL: MemorySSA for function: test_extra_defs
+
+; CHECK: entry:
+; CHECK-NEXT: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 1, ptr %p, align 1
+
+; CHECK: for.body.us:
+; CHECK-NEXT: 5 = MemoryPhi({entry.split.us,1},{for.body.us,6})
+; CHECK-NEXT: 6 = MemoryDef(5)
+; CHECK-NEXT: store i8 2, ptr %p, align 1
+
+; CHECK: for.body:
+; CHECK-NEXT: 4 = MemoryPhi({entry.split,1},{for.body,3})
+; CHECK-NEXT: 2 = MemoryDef(4)
+; CHECK-NEXT: store i8 2, ptr %p
+; CHECK-NEXT: 3 = MemoryDef(2)
+; CHECK-NEXT: %call.i = call ptr @malloc()
+
+define void @test_extra_defs(ptr %p, i1 %arg) {
+entry:
+ store i8 1, ptr %p
+ br label %for.body
+
+for.body:
+ store i8 2, ptr %p
+ %call.i = call ptr @malloc()
+ %cmp.i = icmp ne ptr %call.i, null
+ %or.cond.i = select i1 %cmp.i, i1 %arg, i1 false
+ br i1 %or.cond.i, label %exit, label %for.body
+
+exit:
+ ret void
+}
|
|
I'm seeing some assertion failures on llvm-test-suite... |
42ead24 to
3d9cf6b
Compare
|
Okay, that was just me mixing up two blocks... Fixed and added a test to guard against this. |
|
ping |
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, making getNewDefiningAccessForClone safe seems preferable to working around the issue locally IMO.
| // into an instruction that MemorySSA has no representation for, or (2) turned | ||
| // a MemoryDef into a MemoryUse or an instruction that MemorySSA has no | ||
| // representation for. No other cases are supported. | ||
| void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would probably b good to document the new argument?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Also converted the whole comment into doxygen format.
When determining the replacement access during cloning, we currently leave accesses for instructions that are not in the VMap alone. This is correct if the instruction is not in VMap because it hasn't been cloned, but not if it has been cloned and then removed. In that case, we should walk up to the defining access, like in other simplification cases. To distinguish the two cases, pass in a callback that queries where the instruction is part of the cloned region. An alternative to this would be to delay removal of dead instructions in SimpleLoopUnswitch until after MSSA cloning.
3d9cf6b to
e77e72c
Compare
alinas
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you for resolving this.
| return DefMUD; | ||
|
|
||
| auto *NewDefMUDI = cast_or_null<Instruction>(VMap.lookup(DefMUDI)); | ||
| InsnDefining = NewDefMUDI ? MSSA->getMemoryAccess(NewDefMUDI) : nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the old implementation this should be:
InsnDefining = NewDefMUDI ? MSSA->getMemoryAccess(NewDefMUDI) : MA;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hm, why? The intention here is that if there is no NewDefMUDI we will go to the recursive getNewDefiningAccessForClone call.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That is, this is exactly the behavior I'm changing. Instead of referencing the old MA (which is from the original, non-cloned region), we should look upwards for the new def.
When determining the replacement access during cloning, we currently leave accesses for instructions that are not in the VMap alone. This is correct if the instruction is not in VMap because it hasn't been cloned, but not if it has been cloned and then removed. In that case, we should walk up to the defining access, like in other simplification cases.
To distinguish the two cases, pass in a callback that queries whether the instruction is part of the cloned region.
An alternative to this would be to delay removal of dead instructions in SimpleLoopUnswitch until after MSSA cloning.
Fixes #116228.