@@ -883,84 +883,6 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,
883
883
}
884
884
}
885
885
886
- struct WeightInfo {
887
- // Weights for current iteration.
888
- SmallVector<uint32_t > Weights;
889
- // Weights to subtract after each iteration.
890
- const SmallVector<uint32_t > SubWeights;
891
- };
892
-
893
- // / Update the branch weights of an exiting block of a peeled-off loop
894
- // / iteration.
895
- // / Let F is a weight of the edge to continue (fallthrough) into the loop.
896
- // / Let E is a weight of the edge to an exit.
897
- // / F/(F+E) is a probability to go to loop and E/(F+E) is a probability to
898
- // / go to exit.
899
- // / Then, Estimated ExitCount = F / E.
900
- // / For I-th (counting from 0) peeled off iteration we set the weights for
901
- // / the peeled exit as (EC - I, 1). It gives us reasonable distribution,
902
- // / The probability to go to exit 1/(EC-I) increases. At the same time
903
- // / the estimated exit count in the remainder loop reduces by I.
904
- // / To avoid dealing with division rounding we can just multiple both part
905
- // / of weights to E and use weight as (F - I * E, E).
906
- static void updateBranchWeights (Instruction *Term, WeightInfo &Info) {
907
- setBranchWeights (*Term, Info.Weights , /* IsExpected=*/ false );
908
- for (auto [Idx, SubWeight] : enumerate(Info.SubWeights ))
909
- if (SubWeight != 0 )
910
- // Don't set the probability of taking the edge from latch to loop header
911
- // to less than 1:1 ratio (meaning Weight should not be lower than
912
- // SubWeight), as this could significantly reduce the loop's hotness,
913
- // which would be incorrect in the case of underestimating the trip count.
914
- Info.Weights [Idx] =
915
- Info.Weights [Idx] > SubWeight
916
- ? std::max (Info.Weights [Idx] - SubWeight, SubWeight)
917
- : SubWeight;
918
- }
919
-
920
- // / Initialize the weights for all exiting blocks.
921
- static void initBranchWeights (DenseMap<Instruction *, WeightInfo> &WeightInfos,
922
- Loop *L) {
923
- SmallVector<BasicBlock *> ExitingBlocks;
924
- L->getExitingBlocks (ExitingBlocks);
925
- for (BasicBlock *ExitingBlock : ExitingBlocks) {
926
- Instruction *Term = ExitingBlock->getTerminator ();
927
- SmallVector<uint32_t > Weights;
928
- if (!extractBranchWeights (*Term, Weights))
929
- continue ;
930
-
931
- // See the comment on updateBranchWeights() for an explanation of what we
932
- // do here.
933
- uint32_t FallThroughWeights = 0 ;
934
- uint32_t ExitWeights = 0 ;
935
- for (auto [Succ, Weight] : zip (successors (Term), Weights)) {
936
- if (L->contains (Succ))
937
- FallThroughWeights += Weight;
938
- else
939
- ExitWeights += Weight;
940
- }
941
-
942
- // Don't try to update weights for degenerate case.
943
- if (FallThroughWeights == 0 )
944
- continue ;
945
-
946
- SmallVector<uint32_t > SubWeights;
947
- for (auto [Succ, Weight] : zip (successors (Term), Weights)) {
948
- if (!L->contains (Succ)) {
949
- // Exit weights stay the same.
950
- SubWeights.push_back (0 );
951
- continue ;
952
- }
953
-
954
- // Subtract exit weights on each iteration, distributed across all
955
- // fallthrough edges.
956
- double W = (double )Weight / (double )FallThroughWeights;
957
- SubWeights.push_back ((uint32_t )(ExitWeights * W));
958
- }
959
-
960
- WeightInfos.insert ({Term, {std::move (Weights), std::move (SubWeights)}});
961
- }
962
- }
963
-
964
886
// / Clones the body of the loop L, putting it between \p InsertTop and \p
965
887
// / InsertBot.
966
888
// / \param IterNumber The serial number of the iteration currently being
@@ -1332,11 +1254,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1332
1254
Instruction *LatchTerm =
1333
1255
cast<Instruction>(cast<BasicBlock>(Latch)->getTerminator ());
1334
1256
1335
- // If we have branch weight information, we'll want to update it for the
1336
- // newly created branches.
1337
- DenseMap<Instruction *, WeightInfo> Weights;
1338
- initBranchWeights (Weights, L);
1339
-
1340
1257
// Identify what noalias metadata is inside the loop: if it is inside the
1341
1258
// loop, the associated metadata must be cloned for each iteration.
1342
1259
SmallVector<MDNode *, 6 > LoopLocalNoAliasDeclScopes;
@@ -1382,11 +1299,6 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1382
1299
assert (DT.verify (DominatorTree::VerificationLevel::Fast));
1383
1300
#endif
1384
1301
1385
- for (auto &[Term, Info] : Weights) {
1386
- auto *TermCopy = cast<Instruction>(VMap[Term]);
1387
- updateBranchWeights (TermCopy, Info);
1388
- }
1389
-
1390
1302
// Remove Loop metadata from the latch branch instruction
1391
1303
// because it is not the Loop's latch branch anymore.
1392
1304
auto *LatchTermCopy = cast<Instruction>(VMap[LatchTerm]);
@@ -1426,15 +1338,38 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
1426
1338
}
1427
1339
}
1428
1340
1429
- for (const auto &[Term, Info] : Weights) {
1430
- setBranchWeights (*Term, Info.Weights , /* IsExpected=*/ false );
1431
- }
1432
-
1433
1341
// Update Metadata for count of peeled off iterations.
1434
1342
unsigned AlreadyPeeled = 0 ;
1435
1343
if (auto Peeled = getOptionalIntLoopAttribute (L, PeeledCountMetaData))
1436
1344
AlreadyPeeled = *Peeled;
1437
- addStringMetadataToLoop (L, PeeledCountMetaData, AlreadyPeeled + PeelCount);
1345
+ unsigned TotalPeeled = AlreadyPeeled + PeelCount;
1346
+ addStringMetadataToLoop (L, PeeledCountMetaData, TotalPeeled);
1347
+
1348
+ // Update metadata for the estimated trip count. The original branch weight
1349
+ // metadata is already correct for both the remaining loop and the peeled loop
1350
+ // iterations, so do not adjust it.
1351
+ //
1352
+ // For example, consider what happens when peeling 2 iterations from a loop
1353
+ // with an estimated trip count of 10 and inserting them before the remaining
1354
+ // loop. Each of the peeled iterations and each iteration in the remaining
1355
+ // loop still has the same probability of exiting the *entire original* loop
1356
+ // as it did when in the original loop, and thus it should still have the same
1357
+ // branch weights. The peeled iterations' non-zero probabilities of exiting
1358
+ // already appropriately reduce the probability of reaching the remaining
1359
+ // iterations just as they did in the original loop. Trying to also adjust
1360
+ // the remaining loop's branch weights to reflect its new trip count of 8 will
1361
+ // erroneously further reduce its block frequencies. However, in case an
1362
+ // analysis later needs to determine the trip count of the remaining loop
1363
+ // while examining it in isolation without considering the probability of
1364
+ // actually reaching it, we store the new trip count as separate metadata.
1365
+ if (auto EstimatedTripCount = getLoopEstimatedTripCount (L)) {
1366
+ unsigned EstimatedTripCountNew = *EstimatedTripCount;
1367
+ if (EstimatedTripCountNew < TotalPeeled)
1368
+ EstimatedTripCountNew = 0 ;
1369
+ else
1370
+ EstimatedTripCountNew -= TotalPeeled;
1371
+ setLoopEstimatedTripCount (L, EstimatedTripCountNew);
1372
+ }
1438
1373
1439
1374
if (Loop *ParentLoop = L->getParentLoop ())
1440
1375
L = ParentLoop;
0 commit comments