@@ -154,33 +154,45 @@ static Constant *findConstantFor(Value *V, ConstMap &KnownConstants) {
154154 return KnownConstants.lookup (V);
155155}
156156
157- Cost InstCostVisitor::getCodeSizeBonusFromPendingPHIs () {
157+ Cost InstCostVisitor::getCodeSizeSavingsFromPendingPHIs () {
158158 Cost CodeSize;
159159 while (!PendingPHIs.empty ()) {
160160 Instruction *Phi = PendingPHIs.pop_back_val ();
161161 // The pending PHIs could have been proven dead by now.
162162 if (isBlockExecutable (Phi->getParent ()))
163- CodeSize += getUserCodeSizeBonus (Phi);
163+ CodeSize += getCodeSizeSavingsForUser (Phi);
164164 }
165165 return CodeSize;
166166}
167167
168168// / Compute the codesize savings for replacing argument \p A with constant \p C.
169- Cost InstCostVisitor::getCodeSizeBonus (Argument *A, Constant *C) {
169+ Cost InstCostVisitor::getCodeSizeSavingsForArg (Argument *A, Constant *C) {
170170 LLVM_DEBUG (dbgs () << " FnSpecialization: Analysing bonus for constant: "
171171 << C->getNameOrAsOperand () << " \n " );
172172 Cost CodeSize;
173173 for (auto *U : A->users ())
174174 if (auto *UI = dyn_cast<Instruction>(U))
175175 if (isBlockExecutable (UI->getParent ()))
176- CodeSize += getUserCodeSizeBonus (UI, A, C);
176+ CodeSize += getCodeSizeSavingsForUser (UI, A, C);
177177
178178 LLVM_DEBUG (dbgs () << " FnSpecialization: Accumulated bonus {CodeSize = "
179179 << CodeSize << " } for argument " << *A << " \n " );
180180 return CodeSize;
181181}
182182
183- Cost InstCostVisitor::getLatencyBonus () {
183+ // / Compute the latency savings from replacing all arguments with constants for
184+ // / a specialization candidate. As this function computes the latency savings
185+ // / for all Instructions in KnownConstants at once, it should be called only
186+ // / after every instruction has been visited, i.e. after:
187+ // /
188+ // / * getCodeSizeBonus has been run for every constant argument of a
189+ // / specialization candidate
190+ // /
191+ // / * getCodeSizeBonusFromPendingPHIs has been run
192+ // /
193+ // / to ensure that the latency savings are calculated for all Instructions we
194+ // / have visited and found to be constant.
195+ Cost InstCostVisitor::getLatencySavingsForKnownConstants () {
184196 auto &BFI = GetBFI (*F);
185197 Cost Latency = 0 ;
186198
@@ -198,8 +210,8 @@ Cost InstCostVisitor::getLatencyBonus() {
198210 return Latency;
199211}
200212
201- Cost InstCostVisitor::getUserCodeSizeBonus (Instruction *User, Value *Use,
202- Constant *C) {
213+ Cost InstCostVisitor::getCodeSizeSavingsForUser (Instruction *User, Value *Use,
214+ Constant *C) {
203215 // We have already propagated a constant for this user.
204216 if (KnownConstants.contains (User))
205217 return 0 ;
@@ -232,7 +244,7 @@ Cost InstCostVisitor::getUserCodeSizeBonus(Instruction *User, Value *Use,
232244 for (auto *U : User->users ())
233245 if (auto *UI = dyn_cast<Instruction>(U))
234246 if (UI != User && isBlockExecutable (UI->getParent ()))
235- CodeSize += getUserCodeSizeBonus (UI, User, C);
247+ CodeSize += getCodeSizeSavingsForUser (UI, User, C);
236248
237249 return CodeSize;
238250}
@@ -819,6 +831,15 @@ static Function *cloneCandidateFunction(Function *F, unsigned NSpecs) {
819831 return Clone;
820832}
821833
834+ static unsigned getCostValue (const Cost &C) {
835+ int64_t Value = *C.getValue ();
836+
837+ assert (Value >= 0 && " CodeSize and Latency cannot be negative" );
838+ // It is safe to down cast since we know the arguments cannot be negative and
839+ // Cost is of type int64_t.
840+ return static_cast <unsigned >(Value);
841+ }
842+
822843bool FunctionSpecializer::findSpecializations (Function *F, unsigned FuncSize,
823844 SmallVectorImpl<Spec> &AllSpecs,
824845 SpecMap &SM) {
@@ -889,18 +910,20 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
889910 unsigned Score = 0 ;
890911 InstCostVisitor Visitor = getInstCostVisitorFor (F);
891912 for (ArgInfo &A : S.Args ) {
892- CodeSize += Visitor.getCodeSizeBonus (A.Formal , A.Actual );
913+ CodeSize += Visitor.getCodeSizeSavingsForArg (A.Formal , A.Actual );
893914 Score += getInliningBonus (A.Formal , A.Actual );
894915 }
895- CodeSize += Visitor.getCodeSizeBonusFromPendingPHIs ();
916+ CodeSize += Visitor.getCodeSizeSavingsFromPendingPHIs ();
896917
897918 LLVM_DEBUG (dbgs () << " FnSpecialization: Specialization bonus {CodeSize = "
898919 << CodeSize << " , Inlining = " << Score << " }\n " );
899920
900- Bonus B = {CodeSize, 0 };
901- FunctionGrowth[F] += FuncSize - B.CodeSize ;
921+ unsigned LatencySavings = 0 ;
922+ unsigned CodeSizeSavings = getCostValue (CodeSize);
923+ FunctionGrowth[F] += FuncSize - CodeSizeSavings;
902924
903- auto IsProfitable = [](Bonus &B, unsigned Score, unsigned FuncSize,
925+ auto IsProfitable = [](unsigned CodeSizeSavings, unsigned &LatencySavings,
926+ unsigned Score, unsigned FuncSize,
904927 unsigned FuncGrowth, InstCostVisitor &V) -> bool {
905928 // No check required.
906929 if (ForceSpecialization)
@@ -909,18 +932,18 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
909932 if (Score > MinInliningBonus * FuncSize / 100 )
910933 return true ;
911934 // Minimum codesize savings.
912- if (B. CodeSize < MinCodeSizeSavings * FuncSize / 100 )
935+ if (CodeSizeSavings < MinCodeSizeSavings * FuncSize / 100 )
913936 return false ;
914937
915938 // Lazily compute the Latency, to avoid unnecessarily computing BFI.
916- B += { 0 , V. getLatencyBonus ()} ;
939+ LatencySavings = getCostValue (V. getLatencySavingsForKnownConstants ()) ;
917940
918941 LLVM_DEBUG (
919942 dbgs () << " FnSpecialization: Specialization bonus {Latency = "
920- << B. Latency << " }\n " );
943+ << LatencySavings << " }\n " );
921944
922945 // Minimum latency savings.
923- if (B. Latency < MinLatencySavings * FuncSize / 100 )
946+ if (LatencySavings < MinLatencySavings * FuncSize / 100 )
924947 return false ;
925948 // Maximum codesize growth.
926949 if (FuncGrowth / FuncSize > MaxCodeSizeGrowth)
@@ -929,11 +952,12 @@ bool FunctionSpecializer::findSpecializations(Function *F, unsigned FuncSize,
929952 };
930953
931954 // Discard unprofitable specialisations.
932- if (!IsProfitable (B, Score, FuncSize, FunctionGrowth[F], Visitor))
955+ if (!IsProfitable (CodeSizeSavings, LatencySavings, Score, FuncSize,
956+ FunctionGrowth[F], Visitor))
933957 continue ;
934958
935959 // Create a new specialisation entry.
936- Score += std::max (B. CodeSize , B. Latency );
960+ Score += std::max (CodeSizeSavings, LatencySavings );
937961 auto &Spec = AllSpecs.emplace_back (F, S, Score);
938962 if (CS.getFunction () != F)
939963 Spec.CallSites .push_back (&CS);
0 commit comments