@@ -112,7 +112,7 @@ bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
112112Cost InstCostVisitor::estimateBasicBlocks (
113113 SmallVectorImpl<BasicBlock *> &WorkList) {
114114 Cost CodeSize = 0 ;
115- // Accumulate the instruction cost of each basic block weighted by frequency .
115+ // Accumulate the codesize savings of each basic block.
116116 while (!WorkList.empty ()) {
117117 BasicBlock *BB = WorkList.pop_back_val ();
118118
@@ -154,37 +154,55 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
154154 return KnownConstants.lookup (V);
155155}
156156
157- Bonus InstCostVisitor::getBonusFromPendingPHIs () {
158- Bonus B ;
157+ Cost InstCostVisitor::getCodeSizeBonusFromPendingPHIs () {
158+ Cost CodeSize ;
159159 while (!PendingPHIs.empty ()) {
160160 Instruction *Phi = PendingPHIs.pop_back_val ();
161161 // The pending PHIs could have been proven dead by now.
162162 if (isBlockExecutable (Phi->getParent ()))
163- B += getUserBonus (Phi);
163+ CodeSize += getUserCodeSizeBonus (Phi);
164164 }
165- return B ;
165+ return CodeSize ;
166166}
167167
168- // / Compute a bonus for replacing argument \p A with constant \p C.
169- Bonus InstCostVisitor::getSpecializationBonus (Argument *A, Constant *C) {
168+ // / Compute the codesize savings for replacing argument \p A with constant \p C.
169+ Cost InstCostVisitor::getCodeSizeBonus (Argument *A, Constant *C) {
170170 LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
171171 << C->getNameOrAsOperand () << " \n " );
172- Bonus B ;
172+ Cost CodeSize ;
173173 for (auto *U : A->users ())
174174 if (auto *UI = dyn_cast<Instruction>(U))
175175 if (isBlockExecutable (UI->getParent ()))
176- B += getUserBonus (UI, A, C);
176+ CodeSize += getUserCodeSizeBonus (UI, A, C);
177177
178178 LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
179- << B.CodeSize << " , Latency = " << B.Latency
180- << " } for argument " << *A << " \n " );
181- return B;
179+ << CodeSize << " } for argument " << *A << " \n " );
180+ return CodeSize;
181+ }
182+
183+ Cost InstCostVisitor::getLatencyBonus () {
184+ auto &BFI = GetBFI (*F);
185+ Cost Latency = 0 ;
186+
187+ for (auto Pair : KnownConstants) {
188+ Instruction *I = dyn_cast<Instruction>(Pair.first );
189+ if (!I)
190+ continue ;
191+
192+ uint64_t Weight = BFI.getBlockFreq (I->getParent ()).getFrequency () /
193+ BFI.getEntryFreq ().getFrequency ();
194+ Latency +=
195+ Weight * TTI.getInstructionCost (I, TargetTransformInfo::TCK_Latency);
196+ }
197+
198+ return Latency;
182199}
183200
184- Bonus InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
201+ Cost InstCostVisitor::getUserCodeSizeBonus (Instruction *User, Value *Use,
202+ Constant *C) {
185203 // We have already propagated a constant for this user.
186204 if (KnownConstants.contains (User))
187- return { 0 , 0 } ;
205+ return 0 ;
188206
189207 // Cache the iterator before visiting.
190208 LastVisited = Use ? KnownConstants.insert ({Use, C}).first
@@ -198,7 +216,7 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
198216 } else {
199217 C = visit (*User);
200218 if (!C)
201- return { 0 , 0 } ;
219+ return 0 ;
202220 }
203221
204222 // Even though it doesn't make sense to bind switch and branch instructions
@@ -208,23 +226,15 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
208226
209227 CodeSize += TTI.getInstructionCost (User, TargetTransformInfo::TCK_CodeSize);
210228
211- uint64_t Weight = BFI.getBlockFreq (User->getParent ()).getFrequency () /
212- BFI.getEntryFreq ().getFrequency ();
213-
214- Cost Latency = Weight *
215- TTI.getInstructionCost (User, TargetTransformInfo::TCK_Latency);
216-
217229 LLVM_DEBUG (dbgs () << " FnSpecialization: {CodeSize = " << CodeSize
218- << " , Latency = " << Latency << " } for user "
219- << *User << " \n " );
230+ << " } for user " << *User << " \n " );
220231
221- Bonus B (CodeSize, Latency);
222232 for (auto *U : User->users ())
223233 if (auto *UI = dyn_cast<Instruction>(U))
224234 if (UI != User && isBlockExecutable (UI->getParent ()))
225- B += getUserBonus (UI, User, C);
235+ CodeSize += getUserCodeSizeBonus (UI, User, C);
226236
227- return B ;
237+ return CodeSize ;
228238}
229239
230240Cost InstCostVisitor::estimateSwitchInst (SwitchInst &I) {
@@ -875,24 +885,23 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
875885 AllSpecs[Index].CallSites .push_back (&CS);
876886 } else {
877887 // Calculate the specialisation gain.
878- Bonus B ;
888+ Cost CodeSize ;
879889 unsigned Score = 0 ;
880890 InstCostVisitor Visitor = getInstCostVisitorFor (F);
881891 for (ArgInfo &A : S.Args ) {
882- B += Visitor.getSpecializationBonus (A.Formal , A.Actual );
892+ CodeSize += Visitor.getCodeSizeBonus (A.Formal , A.Actual );
883893 Score += getInliningBonus (A.Formal , A.Actual );
884894 }
885- B += Visitor.getBonusFromPendingPHIs ();
886-
895+ CodeSize += Visitor.getCodeSizeBonusFromPendingPHIs ();
887896
888897 LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
889- << B.CodeSize << " , Latency = " << B.Latency
890- << " , Inlining = " << Score << " }\n " );
898+ << CodeSize << " , Inlining = " << Score << " }\n " );
891899
900+ Bonus B = {CodeSize, 0 };
892901 FunctionGrowth[F] += FuncSize - B.CodeSize ;
893902
894903 auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
895- unsigned FuncGrowth) -> bool {
904+ unsigned FuncGrowth, InstCostVisitor &V ) -> bool {
896905 // No check required.
897906 if (ForceSpecialization)
898907 return true ;
@@ -902,6 +911,14 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
902911 // Minimum codesize savings.
903912 if (B.CodeSize < MinCodeSizeSavings * FuncSize / 100 )
904913 return false ;
914+
915+ // Lazily compute the Latency, to avoid unnecessarily computing BFI.
916+ B += {0 , V.getLatencyBonus ()};
917+
918+ LLVM_DEBUG (
919+ dbgs () << " FnSpecialization: Specialization bonus {Latency = "
920+ << B.Latency << " }\n " );
921+
905922 // Minimum latency savings.
906923 if (B.Latency < MinLatencySavings * FuncSize / 100 )
907924 return false ;
@@ -912,7 +929,7 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
912929 };
913930
914931 // Discard unprofitable specialisations.
915- if (!IsProfitable (B, Score, FuncSize, FunctionGrowth[F]))
932+ if (!IsProfitable (B, Score, FuncSize, FunctionGrowth[F], Visitor ))
916933 continue ;
917934
918935 // Create a new specialisation entry.
0 commit comments