Skip to content

Commit 60306d5

Browse files
committed
[WIP] Extend LVI to cache ranges per BB predecessor.
1 parent 59e43fe commit 60306d5

File tree

2 files changed

+182
-1
lines changed

2 files changed

+182
-1
lines changed

llvm/lib/Analysis/LazyValueInfo.cpp

Lines changed: 113 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,10 @@ namespace {
103103

104104
namespace {
105105
using NonNullPointerSet = SmallDenseSet<AssertingVH<Value>, 2>;
106+
using BBLatticeElementMap =
107+
SmallDenseMap<PoisoningVH<BasicBlock>, ValueLatticeElement, 4>;
108+
using PredecessorValueLatticeMap =
109+
SmallDenseMap<AssertingVH<Value>, BBLatticeElementMap, 4>;
106110

107111
/// This is the cache kept by LazyValueInfo which
108112
/// maintains information about queries across the clients' queries.
@@ -117,6 +121,10 @@ class LazyValueInfoCache {
117121
// std::nullopt indicates that the nonnull pointers for this basic block
118122
// block have not been computed yet.
119123
std::optional<NonNullPointerSet> NonNullPointers;
124+
// This is an extension of the above LatticeElements, caching, for each
125+
// Value, a ValueLatticeElement, for each predecessor of the BB tracked by
126+
// this entry.
127+
PredecessorValueLatticeMap PredecessorLatticeElements;
120128
};
121129

122130
/// Cached information per basic block.
@@ -161,6 +169,28 @@ class LazyValueInfoCache {
161169
addValueHandle(Val);
162170
}
163171

172+
void insertPredecessorResults(Value *Val, BasicBlock *BB,
173+
BBLatticeElementMap &PredLatticeElements) {
174+
BlockCacheEntry *Entry = getOrCreateBlockEntry(BB);
175+
176+
Entry->PredecessorLatticeElements.insert({Val, PredLatticeElements});
177+
178+
addValueHandle(Val);
179+
}
180+
181+
std::optional<BBLatticeElementMap>
182+
getCachedPredecessorInfo(Value *V, BasicBlock *BB) const {
183+
const BlockCacheEntry *Entry = getBlockEntry(BB);
184+
if (!Entry)
185+
return std::nullopt;
186+
187+
auto LatticeIt = Entry->PredecessorLatticeElements.find_as(V);
188+
if (LatticeIt == Entry->PredecessorLatticeElements.end())
189+
return std::nullopt;
190+
191+
return LatticeIt->second;
192+
}
193+
164194
std::optional<ValueLatticeElement> getCachedValueInfo(Value *V,
165195
BasicBlock *BB) const {
166196
const BlockCacheEntry *Entry = getBlockEntry(BB);
@@ -216,6 +246,7 @@ void LazyValueInfoCache::eraseValue(Value *V) {
216246
Pair.second->OverDefined.erase(V);
217247
if (Pair.second->NonNullPointers)
218248
Pair.second->NonNullPointers->erase(V);
249+
Pair.second->PredecessorLatticeElements.erase(V);
219250
}
220251

221252
auto HandleIt = ValueHandles.find_as(V);
@@ -230,6 +261,10 @@ void LVIValueHandle::deleted() {
230261
}
231262

232263
void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
264+
// Clear all when a BB is removed.
265+
for (auto &Pair : BlockCache) {
266+
Pair.second->PredecessorLatticeElements.clear();
267+
}
233268
BlockCache.erase(BB);
234269
}
235270

@@ -691,6 +726,7 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
691726
// find a path to function entry. TODO: We should consider explicitly
692727
// canonicalizing to make this true rather than relying on this happy
693728
// accident.
729+
BBLatticeElementMap PredLatticeElements;
694730
for (BasicBlock *Pred : predecessors(BB)) {
695731
// Skip self loops.
696732
if (Pred == BB)
@@ -710,8 +746,12 @@ LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) {
710746
<< Pred->getName() << "' (non local).\n");
711747
return Result;
712748
}
749+
750+
PredLatticeElements.insert({Pred, *EdgeResult});
713751
}
714752

753+
TheCache.insertPredecessorResults(Val, BB, PredLatticeElements);
754+
715755
// Return the merged value, which is more precise than 'overdefined'.
716756
assert(!Result.isOverdefined());
717757
return Result;
@@ -724,6 +764,7 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
724764
// Loop over all of our predecessors, merging what we know from them into
725765
// result. See the comment about the chosen traversal order in
726766
// solveBlockValueNonLocal; the same reasoning applies here.
767+
BBLatticeElementMap PredLatticeElements;
727768
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
728769
BasicBlock *PhiBB = PN->getIncomingBlock(i);
729770
Value *PhiVal = PN->getIncomingValue(i);
@@ -746,8 +787,12 @@ LazyValueInfoImpl::solveBlockValuePHINode(PHINode *PN, BasicBlock *BB) {
746787

747788
return Result;
748789
}
790+
791+
PredLatticeElements.insert({PhiBB, *EdgeResult});
749792
}
750793

794+
TheCache.insertPredecessorResults(PN, BB, PredLatticeElements);
795+
751796
// Return the merged value, which is more precise than 'overdefined'.
752797
assert(!Result.isOverdefined() && "Possible PHI in entry block?");
753798
return Result;
@@ -1002,7 +1047,74 @@ LazyValueInfoImpl::solveBlockValueBinaryOpImpl(
10021047

10031048
const ConstantRange &LHSRange = *LHSRes;
10041049
const ConstantRange &RHSRange = *RHSRes;
1005-
return ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
1050+
1051+
std::optional<ValueLatticeElement> MergedResult =
1052+
ValueLatticeElement::getRange(OpFn(LHSRange, RHSRange));
1053+
1054+
std::optional<BBLatticeElementMap> PredLHS =
1055+
TheCache.getCachedPredecessorInfo(LHS, BB);
1056+
if (!PredLHS)
1057+
return MergedResult;
1058+
std::optional<BBLatticeElementMap> PredRHS =
1059+
TheCache.getCachedPredecessorInfo(RHS, BB);
1060+
if (!PredRHS)
1061+
return MergedResult;
1062+
1063+
const BBLatticeElementMap &LHSPredMap = *PredLHS;
1064+
const BBLatticeElementMap &RHSPredMap = *PredRHS;
1065+
1066+
BBLatticeElementMap PredLatticeElements;
1067+
ValueLatticeElement OverallPredResult;
1068+
for (auto *Pred : predecessors(BB)) {
1069+
auto LHSIt = LHSPredMap.find_as(Pred);
1070+
if (LHSIt == LHSPredMap.end())
1071+
return MergedResult;
1072+
const ValueLatticeElement &LHSFromPred = LHSIt->second;
1073+
std::optional<ConstantRange> LHSFromPredRes =
1074+
LHSFromPred.asConstantRange(LHS->getType());
1075+
if (!LHSFromPredRes)
1076+
return MergedResult;
1077+
1078+
auto RHSIt = RHSPredMap.find_as(Pred);
1079+
if (RHSIt == RHSPredMap.end())
1080+
return MergedResult;
1081+
const ValueLatticeElement &RHSFromPred = RHSIt->second;
1082+
std::optional<ConstantRange> RHSFromPredRes =
1083+
RHSFromPred.asConstantRange(RHS->getType());
1084+
if (!RHSFromPredRes)
1085+
return MergedResult;
1086+
1087+
const ConstantRange &LHSFromPredRange = *LHSFromPredRes;
1088+
const ConstantRange &RHSFromPredRange = *RHSFromPredRes;
1089+
std::optional<ValueLatticeElement> PredResult =
1090+
ValueLatticeElement::getRange(OpFn(LHSFromPredRange, RHSFromPredRange));
1091+
if (!PredResult)
1092+
return MergedResult;
1093+
if (PredResult->isOverdefined()) {
1094+
LLVM_DEBUG(
1095+
dbgs() << " pred BB '" << Pred->getName() << "' for BB '"
1096+
<< BB->getName()
1097+
<< "' overdefined. Discarding all predecessor intervals.\n");
1098+
return MergedResult;
1099+
}
1100+
PredLatticeElements.insert({Pred, *PredResult});
1101+
OverallPredResult.mergeIn(*PredResult);
1102+
}
1103+
1104+
// If this point is reached, all predecessors for both LHS and RHS have
1105+
// constant ranges previously computed. Can cache result and use the
1106+
// OverallPredResult;
1107+
TheCache.insertPredecessorResults(I, BB, PredLatticeElements);
1108+
1109+
LLVM_DEBUG(dbgs() << " Using predecessor intervals, evaluated " << *I
1110+
<< " to: " << OverallPredResult << ".\n");
1111+
1112+
if (!MergedResult)
1113+
return OverallPredResult;
1114+
1115+
LLVM_DEBUG(dbgs() << " Intersecting intervals for " << *I << ": "
1116+
<< OverallPredResult << " and " << MergedResult << ".\n");
1117+
return MergedResult->intersect(OverallPredResult);
10061118
}
10071119

10081120
std::optional<ValueLatticeElement>
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt < %s -passes="correlated-propagation" -S 2>&1 | FileCheck %s
3+
4+
@global = external local_unnamed_addr global [4338 x i32], align 16
5+
6+
define dso_local noundef zeroext i1 @bar(i64 noundef %arg, ptr noundef writeonly captures(none) %arg1) local_unnamed_addr {
7+
; CHECK-LABEL: define dso_local noundef zeroext i1 @bar(
8+
; CHECK-SAME: i64 noundef [[ARG:%.*]], ptr noundef writeonly captures(none) [[ARG1:%.*]]) local_unnamed_addr {
9+
; CHECK-NEXT: [[BB:.*]]:
10+
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i64 [[ARG]], 1025
11+
; CHECK-NEXT: br i1 [[ICMP]], label %[[BB4:.*]], label %[[BB2:.*]]
12+
; CHECK: [[BB2]]:
13+
; CHECK-NEXT: [[ICMP3:%.*]] = icmp ult i64 [[ARG]], 262145
14+
; CHECK-NEXT: br i1 [[ICMP3]], label %[[BB4]], label %[[BB9:.*]]
15+
; CHECK: [[BB4]]:
16+
; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ 7, %[[BB]] ], [ 15487, %[[BB2]] ]
17+
; CHECK-NEXT: [[PHI5:%.*]] = phi i64 [ 3, %[[BB]] ], [ 7, %[[BB2]] ]
18+
; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i64 [[PHI]], [[ARG]]
19+
; CHECK-NEXT: [[LSHR:%.*]] = lshr i64 [[ADD]], [[PHI5]]
20+
; CHECK-NEXT: br i1 true, label %[[BB8:.*]], label %[[BB7:.*]]
21+
; CHECK: [[BB7]]:
22+
; CHECK-NEXT: tail call void @llvm.ubsantrap(i8 18)
23+
; CHECK-NEXT: unreachable
24+
; CHECK: [[BB8]]:
25+
; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 [[LSHR]]
26+
; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[GETELEMENTPTR]], align 4
27+
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[LOAD]] to i64
28+
; CHECK-NEXT: store i64 [[SEXT]], ptr [[ARG1]], align 8
29+
; CHECK-NEXT: br label %[[BB9]]
30+
; CHECK: [[BB9]]:
31+
; CHECK-NEXT: [[PHI10:%.*]] = phi i1 [ true, %[[BB8]] ], [ false, %[[BB2]] ]
32+
; CHECK-NEXT: ret i1 [[PHI10]]
33+
;
34+
bb:
35+
%icmp = icmp ult i64 %arg, 1025
36+
br i1 %icmp, label %bb4, label %bb2
37+
38+
bb2: ; preds = %bb
39+
%icmp3 = icmp ult i64 %arg, 262145
40+
br i1 %icmp3, label %bb4, label %bb9
41+
42+
bb4: ; preds = %bb2, %bb
43+
%phi = phi i64 [ 7, %bb ], [ 15487, %bb2 ]
44+
%phi5 = phi i64 [ 3, %bb ], [ 7, %bb2 ]
45+
%add = add nuw nsw i64 %phi, %arg
46+
%lshr = lshr i64 %add, %phi5
47+
%icmp6 = icmp samesign ult i64 %lshr, 4338
48+
br i1 %icmp6, label %bb8, label %bb7
49+
50+
bb7: ; preds = %bb4
51+
tail call void @llvm.ubsantrap(i8 18)
52+
unreachable
53+
54+
bb8: ; preds = %bb4
55+
%getelementptr = getelementptr inbounds nuw [4338 x i32], ptr @global, i64 0, i64 %lshr
56+
%load = load i32, ptr %getelementptr, align 4
57+
%sext = sext i32 %load to i64
58+
store i64 %sext, ptr %arg1, align 8
59+
br label %bb9
60+
61+
bb9: ; preds = %bb8, %bb2
62+
%phi10 = phi i1 [ true, %bb8 ], [ false, %bb2 ]
63+
ret i1 %phi10
64+
}
65+
66+
; Function Attrs: cold noreturn nounwind
67+
declare void @llvm.ubsantrap(i8 immarg) #0
68+
69+
attributes #0 = { cold noreturn nounwind }

0 commit comments

Comments
 (0)