Skip to content

Commit d382315

Browse files
committed
[FuncSpec] Only compute Latency bonus when necessary
Only compute the Latency component of a specialisation's Bonus when necessary, to avoid unnecessarily computing the Block Frequency Information for a Function.
1 parent a18dd29 commit d382315

File tree

4 files changed

+182
-121
lines changed

4 files changed

+182
-121
lines changed

llvm/include/llvm/Transforms/IPO/FunctionSpecialization.h

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,9 @@ struct Bonus {
173173
};
174174

175175
class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
176+
std::function<BlockFrequencyInfo &(Function &)> GetBFI;
177+
Function *F;
176178
const DataLayout &DL;
177-
BlockFrequencyInfo &BFI;
178179
TargetTransformInfo &TTI;
179180
SCCPSolver &Solver;
180181

@@ -192,26 +193,29 @@ class InstCostVisitor : public InstVisitor<InstCostVisitor, Constant *> {
192193
ConstMap::iterator LastVisited;
193194

194195
public:
195-
InstCostVisitor(const DataLayout &DL, BlockFrequencyInfo &BFI,
196-
TargetTransformInfo &TTI, SCCPSolver &Solver)
197-
: DL(DL), BFI(BFI), TTI(TTI), Solver(Solver) {}
196+
InstCostVisitor(std::function<BlockFrequencyInfo &(Function &)> GetBFI,
197+
Function *F, const DataLayout &DL, TargetTransformInfo &TTI,
198+
SCCPSolver &Solver)
199+
: GetBFI(GetBFI), F(F), DL(DL), TTI(TTI), Solver(Solver) {}
198200

199201
bool isBlockExecutable(BasicBlock *BB) {
200202
return Solver.isBlockExecutable(BB) && !DeadBlocks.contains(BB);
201203
}
202204

203-
Bonus getSpecializationBonus(Argument *A, Constant *C);
205+
Cost getCodeSizeBonus(Argument *A, Constant *C);
206+
207+
Cost getCodeSizeBonusFromPendingPHIs();
204208

205-
Bonus getBonusFromPendingPHIs();
209+
Cost getLatencyBonus();
206210

207211
private:
208212
friend class InstVisitor<InstCostVisitor, Constant *>;
209213

210214
static bool canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
211215
DenseSet<BasicBlock *> &DeadBlocks);
212216

213-
Bonus getUserBonus(Instruction *User, Value *Use = nullptr,
214-
Constant *C = nullptr);
217+
Cost getUserCodeSizeBonus(Instruction *User, Value *Use = nullptr,
218+
Constant *C = nullptr);
215219

216220
Cost estimateBasicBlocks(SmallVectorImpl<BasicBlock *> &WorkList);
217221
Cost estimateSwitchInst(SwitchInst &I);
@@ -283,9 +287,8 @@ class FunctionSpecializer {
283287
bool run();
284288

285289
InstCostVisitor getInstCostVisitorFor(Function *F) {
286-
auto &BFI = GetBFI(*F);
287290
auto &TTI = GetTTI(*F);
288-
return InstCostVisitor(M.getDataLayout(), BFI, TTI, Solver);
291+
return InstCostVisitor(GetBFI, F, M.getDataLayout(), TTI, Solver);
289292
}
290293

291294
private:

llvm/lib/Transforms/IPO/FunctionSpecialization.cpp

Lines changed: 51 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
112112
Cost InstCostVisitor::estimateBasicBlocks(
113113
SmallVectorImpl<BasicBlock *> &WorkList) {
114114
Cost CodeSize = 0;
115-
// Accumulate the instruction cost of each basic block weighted by frequency.
115+
// Accumulate the codesize savings of each basic block.
116116
while (!WorkList.empty()) {
117117
BasicBlock *BB = WorkList.pop_back_val();
118118

@@ -154,37 +154,55 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
154154
return KnownConstants.lookup(V);
155155
}
156156

157-
Bonus InstCostVisitor::getBonusFromPendingPHIs() {
158-
Bonus B;
157+
Cost InstCostVisitor::getCodeSizeBonusFromPendingPHIs() {
158+
Cost CodeSize;
159159
while (!PendingPHIs.empty()) {
160160
Instruction *Phi = PendingPHIs.pop_back_val();
161161
// The pending PHIs could have been proven dead by now.
162162
if (isBlockExecutable(Phi->getParent()))
163-
B += getUserBonus(Phi);
163+
CodeSize += getUserCodeSizeBonus(Phi);
164164
}
165-
return B;
165+
return CodeSize;
166166
}
167167

168-
/// Compute a bonus for replacing argument \p A with constant \p C.
169-
Bonus InstCostVisitor::getSpecializationBonus(Argument *A, Constant *C) {
168+
/// Compute the codesize savings for replacing argument \p A with constant \p C.
169+
Cost InstCostVisitor::getCodeSizeBonus(Argument *A, Constant *C) {
170170
LLVM_DEBUG(dbgs() << "FnSpecialization: Analysing bonus for constant: "
171171
<< C->getNameOrAsOperand() << "\n");
172-
Bonus B;
172+
Cost CodeSize;
173173
for (auto *U : A->users())
174174
if (auto *UI = dyn_cast<Instruction>(U))
175175
if (isBlockExecutable(UI->getParent()))
176-
B += getUserBonus(UI, A, C);
176+
CodeSize += getUserCodeSizeBonus(UI, A, C);
177177

178178
LLVM_DEBUG(dbgs() << "FnSpecialization: Accumulated bonus {CodeSize = "
179-
<< B.CodeSize << ", Latency = " << B.Latency
180-
<< "} for argument " << *A << "\n");
181-
return B;
179+
<< CodeSize << "} for argument " << *A << "\n");
180+
return CodeSize;
181+
}
182+
183+
Cost InstCostVisitor::getLatencyBonus() {
184+
auto &BFI = GetBFI(*F);
185+
Cost Latency = 0;
186+
187+
for (auto Pair : KnownConstants) {
188+
Instruction *I = dyn_cast<Instruction>(Pair.first);
189+
if (!I)
190+
continue;
191+
192+
uint64_t Weight = BFI.getBlockFreq(I->getParent()).getFrequency() /
193+
BFI.getEntryFreq().getFrequency();
194+
Latency +=
195+
Weight * TTI.getInstructionCost(I, TargetTransformInfo::TCK_Latency);
196+
}
197+
198+
return Latency;
182199
}
183200

184-
Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C) {
201+
Cost InstCostVisitor::getUserCodeSizeBonus(Instruction *User, Value *Use,
202+
Constant *C) {
185203
// We have already propagated a constant for this user.
186204
if (KnownConstants.contains(User))
187-
return {0, 0};
205+
return 0;
188206

189207
// Cache the iterator before visiting.
190208
LastVisited = Use ? KnownConstants.insert({Use, C}).first
@@ -198,7 +216,7 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
198216
} else {
199217
C = visit(*User);
200218
if (!C)
201-
return {0, 0};
219+
return 0;
202220
}
203221

204222
// Even though it doesn't make sense to bind switch and branch instructions
@@ -208,23 +226,15 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
208226

209227
CodeSize += TTI.getInstructionCost(User, TargetTransformInfo::TCK_CodeSize);
210228

211-
uint64_t Weight = BFI.getBlockFreq(User->getParent()).getFrequency() /
212-
BFI.getEntryFreq().getFrequency();
213-
214-
Cost Latency = Weight *
215-
TTI.getInstructionCost(User, TargetTransformInfo::TCK_Latency);
216-
217229
LLVM_DEBUG(dbgs() << "FnSpecialization: {CodeSize = " << CodeSize
218-
<< ", Latency = " << Latency << "} for user "
219-
<< *User << "\n");
230+
<< "} for user " << *User << "\n");
220231

221-
Bonus B(CodeSize, Latency);
222232
for (auto *U : User->users())
223233
if (auto *UI = dyn_cast<Instruction>(U))
224234
if (UI != User && isBlockExecutable(UI->getParent()))
225-
B += getUserBonus(UI, User, C);
235+
CodeSize += getUserCodeSizeBonus(UI, User, C);
226236

227-
return B;
237+
return CodeSize;
228238
}
229239

230240
Cost InstCostVisitor::estimateSwitchInst(SwitchInst &I) {
@@ -875,24 +885,23 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
875885
AllSpecs[Index].CallSites.push_back(&CS);
876886
} else {
877887
// Calculate the specialisation gain.
878-
Bonus B;
888+
Cost CodeSize;
879889
unsigned Score = 0;
880890
InstCostVisitor Visitor = getInstCostVisitorFor(F);
881891
for (ArgInfo &A : S.Args) {
882-
B += Visitor.getSpecializationBonus(A.Formal, A.Actual);
892+
CodeSize += Visitor.getCodeSizeBonus(A.Formal, A.Actual);
883893
Score += getInliningBonus(A.Formal, A.Actual);
884894
}
885-
B += Visitor.getBonusFromPendingPHIs();
886-
895+
CodeSize += Visitor.getCodeSizeBonusFromPendingPHIs();
887896

888897
LLVM_DEBUG(dbgs() << "FnSpecialization: Specialization bonus {CodeSize = "
889-
<< B.CodeSize << ", Latency = " << B.Latency
890-
<< ", Inlining = " << Score << "}\n");
898+
<< CodeSize << ", Inlining = " << Score << "}\n");
891899

900+
Bonus B = {CodeSize, 0};
892901
FunctionGrowth[F] += FuncSize - B.CodeSize;
893902

894903
auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
895-
unsigned FuncGrowth) -> bool {
904+
unsigned FuncGrowth, InstCostVisitor &V) -> bool {
896905
// No check required.
897906
if (ForceSpecialization)
898907
return true;
@@ -902,6 +911,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
902911
// Minimum codesize savings.
903912
if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100)
904913
return false;
914+
915+
// Lazily compute the Latency, to avoid unnecessarily computing BFI.
916+
B += {0, V.getLatencyBonus()};
917+
918+
LLVM_DEBUG(
919+
dbgs() << "FnSpecialization: Specialization bonus {Latency = "
920+
<< B.Latency << "}\n");
921+
905922
// Minimum latency savings.
906923
if (B.Latency < MinLatencySavings * FuncSize / 100)
907924
return false;
@@ -912,7 +929,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
912929
};
913930

914931
// Discard unprofitable specialisations.
915-
if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F]))
932+
if (!IsProfitable(B, Score, FuncSize, FunctionGrowth[F], Visitor))
916933
continue;
917934

918935
// Create a new specialisation entry.

llvm/test/Transforms/SCCP/ipsccp-preserve-pdt.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,25 @@
44

55
; This test case is trying to validate that the postdomtree is preserved
66
; correctly by the ipsccp pass. A tricky bug was introduced in commit
7-
; 1b1232047e83b69561 when PDT would be feched using getCachedAnalysis in order
7+
; 1b1232047e83b69561 when PDT would be fetched using getCachedAnalysis in order
88
; to setup a DomTreeUpdater (to update the PDT during transformation in order
99
; to preserve the analysis). But given that commit the PDT could end up being
1010
; required and calculated via BlockFrequency analysis. So the problem was that
1111
; when setting up the DomTreeUpdater we used a nullptr in case PDT wasn't
12-
; cached at the begininng of IPSCCP, to indicate that no updates where needed
12+
; cached at the beginning of IPSCCP, to indicate that no updates were needed
1313
; for PDT. But then the PDT was calculated, given the input IR, and preserved
1414
; using the non-updated state (as the DTU wasn't configured for updating the
1515
; PDT).
1616

1717
; CHECK-NOT: <badref>
1818
; CHECK: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries.
19-
; CHECK-NEXT: [1] <<exit node>> {4294967295,4294967295} [0]
20-
; CHECK-NEXT: [2] %for.cond34 {4294967295,4294967295} [1]
21-
; CHECK-NEXT: [3] %for.cond16 {4294967295,4294967295} [2]
22-
; CHECK-NEXT: [2] %for.body {4294967295,4294967295} [1]
23-
; CHECK-NEXT: [2] %if.end4 {4294967295,4294967295} [1]
24-
; CHECK-NEXT: [3] %entry {4294967295,4294967295} [2]
25-
; CHECK-NEXT: Roots: %for.cond34 %for.body
19+
; CHECK-NEXT: [1] <<exit node>> {4294967295,4294967295} [0]
20+
; CHECK-NEXT: [2] %for.body {4294967295,4294967295} [1]
21+
; CHECK-NEXT: [2] %if.end4 {4294967295,4294967295} [1]
22+
; CHECK-NEXT: [3] %entry {4294967295,4294967295} [2]
23+
; CHECK-NEXT: [2] %for.cond34 {4294967295,4294967295} [1]
24+
; CHECK-NEXT: [3] %for.cond16 {4294967295,4294967295} [2]
25+
; CHECK-NEXT: Roots: %for.body %for.cond34
2626
; CHECK-NEXT: PostDominatorTree for function: bar
2727
; CHECK-NOT: <badref>
2828

0 commit comments

Comments
 (0)