Skip to content

Commit 44d23d5

Browse files
committed
[DSE] Remove calls with known writes to dead memory
This is a reapply of a8a51fe, which was reverted in 1ba99e due to a failing compiler-rt test. That test was a false positive because it was checking asan failures not accounting for the fact the call could be validly optimized out. I hopefully managed to stablize that test in 9b955f. (That's a speculative fix due to disk consumption needed to build compiler-rt tests locally being absurd.) Original commit message follows.. The majority of this change is sinking logic from instcombine into MemoryLocation such that it can be generically reused. If we have a call with a single analyzable write to an argument, we can treat that as-if it were a store of unknown size. Merging the code in this was unblocks DSE in the store to dead memory code paths. In theory, it should also enable classic DSE of such calls, but the code appears to not know how to use object sizes to refine unknown access bounds (yet). In addition, this does make the isAllocRemovable path slightly stronger by reusing the libfunc and additional intrinsics bits which are already in getForDest. Differential Revision: https://reviews.llvm.org/D115904
1 parent 9b955f7 commit 44d23d5

File tree

3 files changed

+50
-40
lines changed

3 files changed

+50
-40
lines changed

llvm/lib/Analysis/MemoryLocation.cpp

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,44 @@ MemoryLocation::getForDest(const CallBase *CB, const TargetLibraryInfo &TLI) {
147147
}
148148
}
149149

150-
return None;
150+
if (!CB->onlyAccessesArgMemory())
151+
return None;
152+
153+
if (CB->hasOperandBundles())
154+
// TODO: remove implementation restriction
155+
return None;
156+
157+
Value *UsedV = nullptr;
158+
Optional<unsigned> UsedIdx;
159+
for (unsigned i = 0; i < CB->arg_size(); i++) {
160+
if (!CB->getArgOperand(i)->getType()->isPointerTy())
161+
continue;
162+
if (!CB->doesNotCapture(i))
163+
// capture would allow the address to be read back in an untracked manner
164+
return None;
165+
if (CB->onlyReadsMemory(i))
166+
continue;
167+
if (!UsedV) {
168+
// First potentially writing parameter
169+
UsedV = CB->getArgOperand(i);
170+
UsedIdx = i;
171+
continue;
172+
}
173+
UsedIdx = None;
174+
if (UsedV != CB->getArgOperand(i))
175+
// Can't describe writing to two distinct locations.
176+
// TODO: This results in an inprecision when two values derived from the
177+
// same object are passed as arguments to the same function.
178+
return None;
179+
}
180+
if (!UsedV)
181+
// We don't currently have a way to represent a "does not write" result
182+
// and thus have to be conservative and return unknown.
183+
return None;
184+
185+
if (UsedIdx)
186+
return getForArgument(CB, *UsedIdx, &TLI);
187+
return MemoryLocation::getBeforeOrAfter(UsedV, CB->getAAMetadata());
151188
}
152189

153190
MemoryLocation MemoryLocation::getForArgument(const CallBase *Call,

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2562,35 +2562,24 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo &TLI,
25622562

25632563
/// Given a call CB which uses an address UsedV, return true if we can prove the
25642564
/// call's only possible effect is storing to V.
2565-
static bool isRemovableWrite(CallBase &CB, Value *UsedV) {
2565+
static bool isRemovableWrite(CallBase &CB, Value *UsedV,
2566+
const TargetLibraryInfo &TLI) {
25662567
if (!CB.use_empty())
25672568
// TODO: add recursion if returned attribute is present
25682569
return false;
25692570

2570-
if (!CB.willReturn() || !CB.doesNotThrow() || !CB.onlyAccessesArgMemory() ||
2571-
CB.isTerminator())
2571+
if (CB.isTerminator())
2572+
// TODO: remove implementation restriction
25722573
return false;
25732574

2574-
if (CB.hasOperandBundles())
2575+
if (!CB.willReturn() || !CB.doesNotThrow())
25752576
return false;
25762577

2577-
for (unsigned i = 0; i < CB.arg_size(); i++) {
2578-
if (!CB.getArgOperand(i)->getType()->isPointerTy())
2579-
continue;
2580-
if (!CB.doesNotCapture(i))
2581-
// capture would allow the address to be read back in an untracked manner
2582-
return false;
2583-
if (UsedV != CB.getArgOperand(i) && !CB.onlyReadsMemory(i))
2584-
// A write to another memory location keeps the call live, and thus we
2585-
// must keep the alloca so that the call has somewhere to write to.
2586-
// TODO: This results in an inprecision when two values derived from the
2587-
// same alloca are passed as arguments to the same function.
2588-
return false;
2589-
// Note: Both reads from and writes to the alloca are fine. Since the
2590-
// result is unused nothing can observe the values read from the alloca
2591-
// without writing it to some other observable location (checked above).
2592-
}
2593-
return true;
2578+
// If the only possible side effect of the call is writing to the alloca,
2579+
// and the result isn't used, we can safely remove any reads implied by the
2580+
// call including those which might read the alloca itself.
2581+
Optional<MemoryLocation> Dest = MemoryLocation::getForDest(&CB, TLI);
2582+
return Dest && Dest->Ptr == UsedV;
25942583
}
25952584

25962585
static bool isAllocSiteRemovable(Instruction *AI,
@@ -2660,7 +2649,7 @@ static bool isAllocSiteRemovable(Instruction *AI,
26602649
}
26612650
}
26622651

2663-
if (isRemovableWrite(*cast<CallBase>(I), PI)) {
2652+
if (isRemovableWrite(*cast<CallBase>(I), PI, TLI)) {
26642653
Users.emplace_back(I);
26652654
continue;
26662655
}

llvm/test/Transforms/DeadStoreElimination/trivial-dse-calls.ll

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,6 @@ declare void @f2(i8*, i8*)
1111
; Basic case for DSEing a trivially dead writing call
1212
define void @test_dead() {
1313
; CHECK-LABEL: @test_dead(
14-
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
15-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
16-
; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1:[0-9]+]]
1714
; CHECK-NEXT: ret void
1815
;
1916
%a = alloca i32, align 4
@@ -28,7 +25,6 @@ define void @test_lifetime() {
2825
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
2926
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
3027
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[BITCAST]])
31-
; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1]]
3228
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[BITCAST]])
3329
; CHECK-NEXT: ret void
3430
;
@@ -48,7 +44,6 @@ define void @test_lifetime2() {
4844
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
4945
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[BITCAST]])
5046
; CHECK-NEXT: call void @unknown()
51-
; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1]]
5247
; CHECK-NEXT: call void @unknown()
5348
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[BITCAST]])
5449
; CHECK-NEXT: ret void
@@ -67,9 +62,6 @@ define void @test_lifetime2() {
6762
; itself since the write will be dropped.
6863
define void @test_dead_readwrite() {
6964
; CHECK-LABEL: @test_dead_readwrite(
70-
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
71-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
72-
; CHECK-NEXT: call void @f(i8* nocapture [[BITCAST]]) #[[ATTR1]]
7365
; CHECK-NEXT: ret void
7466
;
7567
%a = alloca i32, align 4
@@ -82,7 +74,7 @@ define i32 @test_neg_read_after() {
8274
; CHECK-LABEL: @test_neg_read_after(
8375
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
8476
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
85-
; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1]]
77+
; CHECK-NEXT: call void @f(i8* nocapture writeonly [[BITCAST]]) #[[ATTR1:[0-9]+]]
8678
; CHECK-NEXT: [[RES:%.*]] = load i32, i32* [[A]], align 4
8779
; CHECK-NEXT: ret i32 [[RES]]
8880
;
@@ -203,11 +195,6 @@ define i32 @test_neg_captured_before() {
203195
; Show that reading from unrelated memory is okay
204196
define void @test_unreleated_read() {
205197
; CHECK-LABEL: @test_unreleated_read(
206-
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
207-
; CHECK-NEXT: [[A2:%.*]] = alloca i32, align 4
208-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
209-
; CHECK-NEXT: [[BITCAST2:%.*]] = bitcast i32* [[A2]] to i8*
210-
; CHECK-NEXT: call void @f2(i8* nocapture writeonly [[BITCAST]], i8* nocapture readonly [[BITCAST2]]) #[[ATTR1]]
211198
; CHECK-NEXT: ret void
212199
;
213200
%a = alloca i32, align 4
@@ -240,9 +227,6 @@ define void @test_neg_unreleated_capture() {
240227
; itself since the write will be dropped.
241228
define void @test_self_read() {
242229
; CHECK-LABEL: @test_self_read(
243-
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
244-
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32* [[A]] to i8*
245-
; CHECK-NEXT: call void @f2(i8* nocapture writeonly [[BITCAST]], i8* nocapture readonly [[BITCAST]]) #[[ATTR1]]
246230
; CHECK-NEXT: ret void
247231
;
248232
%a = alloca i32, align 4

0 commit comments

Comments
 (0)