@@ -112,7 +112,7 @@ bool InstCostVisitor::canEliminateSuccessor(BasicBlock *BB, BasicBlock *Succ,
112112Cost InstCostVisitor::estimateBasicBlocks (
113113 SmallVectorImpl<BasicBlock *> &WorkList) {
114114 Cost CodeSize = 0 ;
115- // Accumulate the instruction cost of each basic block weighted by frequency .
115+ // Accumulate the codesize savings of each basic block.
116116 while (!WorkList.empty ()) {
117117 BasicBlock *BB = WorkList.pop_back_val ();
118118
@@ -154,37 +154,73 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
154154 return KnownConstants.lookup (V);
155155}
156156
157- Bonus InstCostVisitor::getBonusFromPendingPHIs () {
158- Bonus B ;
157+ Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs () {
158+ Cost CodeSize ;
159159 while (!PendingPHIs.empty ()) {
160160 Instruction *Phi = PendingPHIs.pop_back_val ();
161161 // The pending PHIs could have been proven dead by now.
162162 if (isBlockExecutable (Phi->getParent ()))
163- B += getUserBonus (Phi);
163+ CodeSize += getCodeSizeSavingsForUser (Phi);
164164 }
165- return B ;
165+ return CodeSize ;
166166}
167167
168- // / Compute a bonus for replacing argument \p A with constant \p C.
169- Bonus InstCostVisitor::getSpecializationBonus (Argument *A, Constant *C) {
168+ // / Compute the codesize savings for replacing argument \p A with constant \p C.
169+ Cost InstCostVisitor::getCodeSizeSavingsForArg (Argument *A, Constant *C) {
170170 LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
171171 << C->getNameOrAsOperand () << " \n " );
172- Bonus B ;
172+ Cost CodeSize ;
173173 for (auto *U : A->users ())
174174 if (auto *UI = dyn_cast<Instruction>(U))
175175 if (isBlockExecutable (UI->getParent ()))
176- B += getUserBonus (UI, A, C);
176+ CodeSize += getCodeSizeSavingsForUser (UI, A, C);
177177
178178 LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
179- << B.CodeSize << " , Latency = " << B.Latency
180- << " } for argument " << *A << " \n " );
181- return B;
179+ << CodeSize << " } for argument " << *A << " \n " );
180+ return CodeSize;
182181}
183182
184- Bonus InstCostVisitor::getUserBonus (Instruction *User, Value *Use, Constant *C) {
183+ // / Compute the latency savings from replacing all arguments with constants for
184+ // / a specialization candidate. As this function computes the latency savings
185+ // / for all Instructions in KnownConstants at once, it should be called only
186+ // / after every instruction has been visited, i.e. after:
187+ // /
188+ // / * getCodeSizeSavingsForArg has been run for every constant argument of a
189+ // / specialization candidate
190+ // /
191+ // / * getCodeSizeSavingsFromPendingPHIs has been run
192+ // /
193+ // / to ensure that the latency savings are calculated for all Instructions we
194+ // / have visited and found to be constant.
195+ Cost InstCostVisitor::getLatencySavingsForKnownConstants () {
196+ auto &BFI = GetBFI (*F);
197+ Cost TotalLatency = 0 ;
198+
199+ for (auto Pair : KnownConstants) {
200+ Instruction *I = dyn_cast<Instruction>(Pair.first );
201+ if (!I)
202+ continue ;
203+
204+ uint64_t Weight = BFI.getBlockFreq (I->getParent ()).getFrequency () /
205+ BFI.getEntryFreq ().getFrequency ();
206+
207+ Cost Latency =
208+ Weight * TTI.getInstructionCost (I, TargetTransformInfo::TCK_Latency);
209+
210+ LLVM_DEBUG (dbgs () << " FnSpecialization: {Latency = " << Latency
211+ << " } for instruction " << *I << " \n " );
212+
213+ TotalLatency += Latency;
214+ }
215+
216+ return TotalLatency;
217+ }
218+
219+ Cost InstCostVisitor::getCodeSizeSavingsForUser (Instruction *User, Value *Use,
220+ Constant *C) {
185221 // We have already propagated a constant for this user.
186222 if (KnownConstants.contains (User))
187- return { 0 , 0 } ;
223+ return 0 ;
188224
189225 // Cache the iterator before visiting.
190226 LastVisited = Use ? KnownConstants.insert ({Use, C}).first
@@ -198,7 +234,7 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
198234 } else {
199235 C = visit (*User);
200236 if (!C)
201- return { 0 , 0 } ;
237+ return 0 ;
202238 }
203239
204240 // Even though it doesn't make sense to bind switch and branch instructions
@@ -208,23 +244,15 @@ Bonus InstCostVisitor::getUserBonus(Instruction *User, Value *Use, Constant *C)
208244
209245 CodeSize += TTI.getInstructionCost (User, TargetTransformInfo::TCK_CodeSize);
210246
211- uint64_t Weight = BFI.getBlockFreq (User->getParent ()).getFrequency () /
212- BFI.getEntryFreq ().getFrequency ();
213-
214- Cost Latency = Weight *
215- TTI.getInstructionCost (User, TargetTransformInfo::TCK_Latency);
216-
217247 LLVM_DEBUG (dbgs () << " FnSpecialization: {CodeSize = " << CodeSize
218- << " , Latency = " << Latency << " } for user "
219- << *User << " \n " );
248+ << " } for user " << *User << " \n " );
220249
221- Bonus B (CodeSize, Latency);
222250 for (auto *U : User->users ())
223251 if (auto *UI = dyn_cast<Instruction>(U))
224252 if (UI != User && isBlockExecutable (UI->getParent ()))
225- B += getUserBonus (UI, User, C);
253+ CodeSize += getCodeSizeSavingsForUser (UI, User, C);
226254
227- return B ;
255+ return CodeSize ;
228256}
229257
230258Cost InstCostVisitor::estimateSwitchInst (SwitchInst &I) {
@@ -809,6 +837,18 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
809837 return Clone;
810838}
811839
840+ // / Get the unsigned Value of given Cost object. Assumes the Cost is always
841+ // / non-negative, which is true for both TCK_CodeSize and TCK_Latency, and
842+ // / always Valid.
843+ static unsigned getCostValue (const Cost &C) {
844+ int64_t Value = *C.getValue ();
845+
846+ assert (Value >= 0 && " CodeSize and Latency cannot be negative" );
847+ // It is safe to down cast since we know the arguments cannot be negative and
848+ // Cost is of type int64_t.
849+ return static_cast <unsigned >(Value);
850+ }
851+
812852bool FunctionSpecializer::findSpecializations (Function *F, unsigned FuncSize,
813853 SmallVectorImpl<Spec> &AllSpecs,
814854 SpecMap &SM) {
@@ -875,48 +915,67 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
875915 AllSpecs[Index].CallSites .push_back (&CS);
876916 } else {
877917 // Calculate the specialisation gain.
878- Bonus B ;
918+ Cost CodeSize ;
879919 unsigned Score = 0 ;
880920 InstCostVisitor Visitor = getInstCostVisitorFor (F);
881921 for (ArgInfo &A : S.Args ) {
882- B += Visitor.getSpecializationBonus (A.Formal , A.Actual );
922+ CodeSize += Visitor.getCodeSizeSavingsForArg (A.Formal , A.Actual );
883923 Score += getInliningBonus (A.Formal , A.Actual );
884924 }
885- B += Visitor.getBonusFromPendingPHIs ();
925+ CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs ();
886926
887-
888- LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
889- << B.CodeSize << " , Latency = " << B.Latency
890- << " , Inlining = " << Score << " }\n " );
891-
892- FunctionGrowth[F] += FuncSize - B.CodeSize ;
893-
894- auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
895- unsigned FuncGrowth) -> bool {
927+ auto IsProfitable = [&]() -> bool {
896928 // No check required.
897929 if (ForceSpecialization)
898930 return true ;
931+
932+ unsigned CodeSizeSavings = getCostValue (CodeSize);
933+ // TODO: We should only accumulate codesize increase of specializations
934+ // that are actually created.
935+ FunctionGrowth[F] += FuncSize - CodeSizeSavings;
936+
937+ LLVM_DEBUG (
938+ dbgs () << " FnSpecialization: Specialization bonus {Inlining = "
939+ << Score << " (" << (Score * 100 / FuncSize) << " %)}\n " );
940+
899941 // Minimum inlining bonus.
900942 if (Score > MinInliningBonus * FuncSize / 100 )
901943 return true ;
944+
945+ LLVM_DEBUG (
946+ dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
947+ << CodeSizeSavings << " ("
948+ << (CodeSizeSavings * 100 / FuncSize) << " %)}\n " );
949+
902950 // Minimum codesize savings.
903- if (B. CodeSize < MinCodeSizeSavings * FuncSize / 100 )
951+ if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100 )
904952 return false ;
953+
954+ // Lazily compute the Latency, to avoid unnecessarily computing BFI.
955+ unsigned LatencySavings =
956+ getCostValue (Visitor.getLatencySavingsForKnownConstants ());
957+
958+ LLVM_DEBUG (
959+ dbgs () << " FnSpecialization: Specialization bonus {Latency = "
960+ << LatencySavings << " ("
961+ << (LatencySavings * 100 / FuncSize) << " %)}\n " );
962+
905963 // Minimum latency savings.
906- if (B. Latency < MinLatencySavings * FuncSize / 100 )
964+ if (LatencySavings < MinLatencySavings * FuncSize / 100 )
907965 return false ;
908966 // Maximum codesize growth.
909- if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
967+ if (FunctionGrowth[F] / FuncSize > MaxCodeSizeGrowth)
910968 return false ;
969+
970+ Score += std::max (CodeSizeSavings, LatencySavings);
911971 return true ;
912972 };
913973
914974 // Discard unprofitable specialisations.
915- if (!IsProfitable (B, Score, FuncSize, FunctionGrowth[F] ))
975+ if (!IsProfitable ())
916976 continue ;
917977
918978 // Create a new specialisation entry.
919- Score += std::max (B.CodeSize , B.Latency );
920979 auto &Spec = AllSpecs.emplace_back (F, S, Score);
921980 if (CS.getFunction () != F)
922981 Spec.CallSites .push_back (&CS);
0 commit comments