Skip to content

Commit 30bb5be

Browse files
committed
[CSSPGO] Unblock optimizations with pseudo probe instrumentation part 2.
As a follow-up to D95982, this patch continues unblocking optimizations that are blocked by pseudu probe instrumention. The optimizations unblocked are: - In-block load propagation. - In-block dead store elimination - Memory copy optimization that turns stores to consecutive memories into a memset. These optimizations are local to a block, so they shouldn't affect the profile quality. Reviewed By: wmi Differential Revision: https://reviews.llvm.org/D100075
1 parent 18839be commit 30bb5be

File tree

6 files changed

+69
-8
lines changed

6 files changed

+69
-8
lines changed

llvm/lib/Analysis/Loads.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ Value *llvm::findAvailablePtrLoadStore(
532532
// We must ignore debug info directives when counting (otherwise they
533533
// would affect codegen).
534534
Instruction *Inst = &*--ScanFrom;
535-
if (isa<DbgInfoIntrinsic>(Inst))
535+
if (Inst->isDebugOrPseudoInst())
536536
continue;
537537

538538
// Restore ScanFrom to expected value in case next test succeeds
@@ -620,7 +620,7 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, AAResults &AA,
620620
SmallVector<Instruction *> MustNotAliasInsts;
621621
for (Instruction &Inst : make_range(++Load->getReverseIterator(),
622622
ScanBB->rend())) {
623-
if (isa<DbgInfoIntrinsic>(&Inst))
623+
if (Inst.isDebugOrPseudoInst())
624624
continue;
625625

626626
if (MaxInstsToScan-- == 0)

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1396,7 +1396,7 @@ Instruction *InstCombinerImpl::visitStoreInst(StoreInst &SI) {
13961396
--BBI;
13971397
// Don't count debug info directives, lest they affect codegen,
13981398
// and we skip pointer-to-pointer bitcasts, which are NOPs.
1399-
if (isa<DbgInfoIntrinsic>(BBI) ||
1399+
if (BBI->isDebugOrPseudoInst() ||
14001400
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
14011401
ScanInsts++;
14021402
continue;

llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,13 @@ Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
399399
}
400400
}
401401

402+
// Calls that only access inaccessible memory do not block merging
403+
// accessible stores.
404+
if (auto *CB = dyn_cast<CallBase>(BI)) {
405+
if (CB->onlyAccessesInaccessibleMemory())
406+
continue;
407+
}
408+
402409
if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
403410
// If the instruction is readnone, ignore it, otherwise bail out. We
404411
// don't even allow readonly here because we don't want something like:

llvm/lib/Transforms/Scalar/Sink.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ static bool ProcessBlock(BasicBlock &BB, DominatorTree &DT, LoopInfo &LI,
202202
if (!ProcessedBegin)
203203
--I;
204204

205-
if (isa<DbgInfoIntrinsic>(Inst))
205+
if (Inst->isDebugOrPseudoInst())
206206
continue;
207207

208208
if (SinkInstruction(Inst, Stores, DT, LI, AA)) {

llvm/test/Transforms/SampleProfile/pseudo-probe-instcombine.ll

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
1+
; RUN: opt -passes=instcombine -available-load-scan-limit=2 -S < %s | FileCheck %s
22

33
%struct.nonbonded = type { [2 x %struct.CompAtom*], [2 x %struct.CompAtomExt*], [2 x %struct.CompAtom*], [2 x %class.Vector*], [2 x %class.Vector*], [2 x i32], %class.Vector, double*, double*, %class.ComputeNonbondedWorkArrays*, %class.Pairlists*, i32, i32, double, double, i32, i32, i32, i32 }
44
%struct.CompAtomExt = type { i32 }
@@ -13,11 +13,11 @@
1313
%class.ResizeArrayRaw.3 = type <{ %class.Vector*, i8*, i32, i32, i32, float, i32, [4 x i8] }>
1414
%class.Pairlists = type { i16*, i32, i32 }
1515

16+
define dso_local void @merge(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
1617
;; Check the minPart4 and minPart assignments are merged.
18+
; CHECK-LABEL: @merge(
1719
; CHECK-COUNT-1: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
1820
; CHECK-NOT: getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 16
19-
20-
define dso_local void @_ZN20ComputeNonbondedUtil9calc_pairEP9nonbonded(%struct.nonbonded* nocapture readonly %params) local_unnamed_addr align 2 {
2121
entry:
2222
%savePairlists3 = getelementptr inbounds %struct.nonbonded, %struct.nonbonded* %params, i64 0, i32 11
2323
%0 = load i32, i32* %savePairlists3, align 8
@@ -58,7 +58,36 @@ if.else147: ; preds = %if.then138
5858
ret void
5959
}
6060

61-
declare dso_local void @_ZN9Pairlists8addIndexEv() align 2
61+
define i32 @load(i32* nocapture %a, i32* nocapture %b) {
62+
;; Check the last store is deleted.
63+
; CHECK-LABEL: @load(
64+
; CHECK-NEXT: %1 = getelementptr inbounds i32, i32* %a, i64 1
65+
; CHECK-NEXT: %2 = load i32, i32* %1, align 8
66+
; CHECK-NEXT: %3 = getelementptr inbounds i32, i32* %b, i64 1
67+
; CHECK-NEXT: store i32 %2, i32* %3, align 8
68+
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
69+
; CHECK-NEXT: ret i32 %[[#]]
70+
%1 = getelementptr inbounds i32, i32* %a, i32 1
71+
%2 = load i32, i32* %1, align 8
72+
%3 = getelementptr inbounds i32, i32* %b, i32 1
73+
store i32 %2, i32* %3, align 8
74+
%4 = getelementptr inbounds i32, i32* %b, i32 1
75+
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
76+
%5 = load i32, i32* %4, align 8
77+
ret i32 %5
78+
}
79+
80+
define void @dse(i32* %p) {
81+
;; Check the first store is deleted.
82+
; CHECK-LABEL: @dse(
83+
; CHECK-NEXT: call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
84+
; CHECK-NEXT: store i32 0, i32* [[P:%.*]], align 4
85+
; CHECK-NEXT: ret void
86+
store i32 0, i32* %p
87+
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
88+
store i32 0, i32* %p
89+
ret void
90+
}
6291

6392
; Function Attrs: inaccessiblememonly nounwind willreturn
6493
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: opt < %s -memcpyopt -S | FileCheck %s
2+
3+
%struct.MV = type { i16, i16 }
4+
5+
define void @test(i32* nocapture %c) nounwind optsize {
6+
; All the stores in this example should be merged into a single memset.
7+
; CHECK-NOT: store i32 -1
8+
; CHECK: call void @llvm.memset.p0i8.i64
9+
store i32 -1, i32* %c, align 4
10+
%1 = getelementptr inbounds i32, i32* %c, i32 1
11+
store i32 -1, i32* %1, align 4
12+
%2 = getelementptr inbounds i32, i32* %c, i32 2
13+
store i32 -1, i32* %2, align 4
14+
call void @llvm.pseudoprobe(i64 5116412291814990879, i64 1, i32 0, i64 -1)
15+
%3 = getelementptr inbounds i32, i32* %c, i32 3
16+
store i32 -1, i32* %3, align 4
17+
%4 = getelementptr inbounds i32, i32* %c, i32 4
18+
store i32 -1, i32* %4, align 4
19+
ret void
20+
}
21+
22+
; Function Attrs: inaccessiblememonly nounwind willreturn
23+
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #0
24+
25+
attributes #0 = { inaccessiblememonly nounwind willreturn }

0 commit comments

Comments
 (0)