Skip to content

Commit cc53b8c

Browse files
maksfbmemfrob
authored andcommitted
[BOLT] Fix implementation for TSP solution
Summary: Fix a bug in reconstruction of an optimal path. When calculating the best path we need to take into account a path from new "last" node to the current last node. Add "-tsp-threshold" (defaults to 10) to control when the TSP algorithm should be used. (cherry picked from FBD6253461)
1 parent 2516b0a commit cc53b8c

File tree

4 files changed

+59
-49
lines changed

4 files changed

+59
-49
lines changed

bolt/BinaryBasicBlock.h

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class BinaryBasicBlock {
9090
unsigned Index{InvalidIndex};
9191

9292
/// Index in the current layout.
93-
unsigned LayoutIndex{InvalidIndex};
93+
mutable unsigned LayoutIndex{InvalidIndex};
9494

9595
/// Number of pseudo instructions in this block.
9696
uint32_t NumPseudos{0};
@@ -778,6 +778,19 @@ class BinaryBasicBlock {
778778
/// Returns an estimate of size of basic block during run time.
779779
uint64_t estimateSize() const;
780780

781+
/// Return index in the current layout. The user is responsible for
782+
/// making sure the indices are up to date,
783+
/// e.g. by calling BinaryFunction::updateLayoutIndices();
784+
unsigned getLayoutIndex() const {
785+
assert(isValid());
786+
return LayoutIndex;
787+
}
788+
789+
/// Set layout index. To be used by BinaryFunction.
790+
void setLayoutIndex(unsigned Index) const {
791+
LayoutIndex = Index;
792+
}
793+
781794
private:
782795
void adjustNumPseudos(const MCInst &Inst, int Sign);
783796

@@ -815,19 +828,6 @@ class BinaryBasicBlock {
815828
void setIndex(unsigned I) {
816829
Index = I;
817830
}
818-
819-
/// Return index in the current layout. The user is responsible for
820-
/// making sure the indices are up to date,
821-
/// e.g. by calling BinaryFunction::updateLayoutIndices();
822-
unsigned getLayoutIndex() const {
823-
assert(isValid());
824-
return LayoutIndex;
825-
}
826-
827-
/// Set layout index. To be used by BinaryFunction.
828-
void setLayoutIndex(unsigned Index) {
829-
LayoutIndex = Index;
830-
}
831831
};
832832

833833
bool operator<(const BinaryBasicBlock &LHS, const BinaryBasicBlock &RHS);

bolt/Passes/BinaryPasses.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,15 @@ SplitEH("split-eh",
169169
cl::Hidden,
170170
cl::cat(BoltOptCategory));
171171

172+
static cl::opt<unsigned>
173+
TSPThreshold("tsp-threshold",
174+
cl::desc("maximum number of hot basic blocks in a function for which to use "
175+
"a precise TSP solution while re-ordering basic blocks"),
176+
cl::init(10),
177+
cl::ZeroOrMore,
178+
cl::Hidden,
179+
cl::cat(BoltOptCategory));
180+
172181
} // namespace opts
173182

174183
namespace llvm {
@@ -389,8 +398,7 @@ void ReorderBasicBlocks::modifyFunctionLayout(BinaryFunction &BF,
389398

390399
if (Type == LT_REVERSE) {
391400
Algo.reset(new ReverseReorderAlgorithm());
392-
}
393-
else if (BF.size() <= FUNC_SIZE_THRESHOLD && Type != LT_OPTIMIZE_SHUFFLE) {
401+
} else if (BF.size() <= opts::TSPThreshold && Type != LT_OPTIMIZE_SHUFFLE) {
394402
// Work on optimal solution if problem is small enough
395403
DEBUG(dbgs() << "finding optimal block layout for " << BF << "\n");
396404
Algo.reset(new OptimalReorderAlgorithm());

bolt/Passes/BinaryPasses.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -174,10 +174,6 @@ class ReorderBasicBlocks : public BinaryFunctionPass {
174174
};
175175

176176
private:
177-
// Function size, in number of BBs, above which we fallback to a heuristic
178-
// solution to the layout problem instead of seeking the optimal one.
179-
static constexpr uint64_t FUNC_SIZE_THRESHOLD = 10;
180-
181177
void modifyFunctionLayout(BinaryFunction &Function,
182178
LayoutType Type,
183179
bool MinBranchClusters,

bolt/Passes/ReorderAlgorithm.cpp

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -396,24 +396,26 @@ void MinBranchGreedyClusterAlgorithm::reset() {
396396
}
397397

398398
void OptimalReorderAlgorithm::reorderBasicBlocks(
399-
const BinaryFunction &BF, BasicBlockOrder &Order) const {
399+
const BinaryFunction &BF, BasicBlockOrder &Order) const {
400400
std::vector<std::vector<uint64_t>> Weight;
401-
std::unordered_map<const BinaryBasicBlock *, int> BBToIndex;
402401
std::vector<BinaryBasicBlock *> IndexToBB;
403402

404-
unsigned N = BF.layout_size();
403+
const auto N = BF.layout_size();
404+
assert(N <= std::numeric_limits<uint64_t>::digits &&
405+
"cannot use TSP solution for sizes larger than bits in uint64_t");
406+
405407
// Populating weight map and index map
406-
for (auto BB : BF.layout()) {
407-
BBToIndex[BB] = IndexToBB.size();
408+
for (auto *BB : BF.layout()) {
409+
BB->setLayoutIndex(IndexToBB.size());
408410
IndexToBB.push_back(BB);
409411
}
410412
Weight.resize(N);
411-
for (auto BB : BF.layout()) {
413+
for (auto *BB : BF.layout()) {
412414
auto BI = BB->branch_info_begin();
413-
Weight[BBToIndex[BB]].resize(N);
414-
for (auto I : BB->successors()) {
415+
Weight[BB->getLayoutIndex()].resize(N);
416+
for (auto *SuccBB : BB->successors()) {
415417
if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
416-
Weight[BBToIndex[BB]][BBToIndex[I]] = BI->Count;
418+
Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
417419
++BI;
418420
}
419421
}
@@ -427,26 +429,26 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
427429
DP[1][0] = 0;
428430
// Walk through TSP solutions using a bitmask to represent state (current set
429431
// of BBs in the layout)
430-
unsigned BestSet = 1;
431-
unsigned BestLast = 0;
432+
uint64_t BestSet = 1;
433+
uint64_t BestLast = 0;
432434
int64_t BestWeight = 0;
433-
for (unsigned Set = 1; Set < (1U << N); ++Set) {
435+
for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
434436
// Traverse each possibility of Last BB visited in this layout
435-
for (unsigned Last = 0; Last < N; ++Last) {
437+
for (uint64_t Last = 0; Last < N; ++Last) {
436438
// Case 1: There is no possible layout with this BB as Last
437439
if (DP[Set][Last] == -1)
438440
continue;
439441

440442
// Case 2: There is a layout with this Set and this Last, and we try
441443
// to expand this set with New
442-
for (unsigned New = 1; New < N; ++New) {
444+
for (uint64_t New = 1; New < N; ++New) {
443445
// Case 2a: BB "New" is already in this Set
444-
if ((Set & (1 << New)) != 0)
446+
if ((Set & (1ULL << New)) != 0)
445447
continue;
446448

447449
// Case 2b: BB "New" is not in this set and we add it to this Set and
448450
// record total weight of this layout with "New" as the last BB.
449-
unsigned NewSet = (Set | (1 << New));
451+
uint64_t NewSet = (Set | (1ULL << New));
450452
if (DP[NewSet][New] == -1)
451453
DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
452454
DP[NewSet][New] = std::max(DP[NewSet][New],
@@ -462,38 +464,42 @@ void OptimalReorderAlgorithm::reorderBasicBlocks(
462464
}
463465

464466
// Define final function layout based on layout that maximizes weight
465-
unsigned Last = BestLast;
466-
unsigned Set = BestSet;
467+
uint64_t Last = BestLast;
468+
uint64_t Set = BestSet;
467469
std::vector<bool> Visited;
468470
Visited.resize(N);
469471
Visited[Last] = true;
470472
Order.push_back(IndexToBB[Last]);
471-
Set = Set & ~(1U << Last);
473+
Set = Set & ~(1ULL << Last);
472474
while (Set != 0) {
473475
int64_t Best = -1;
474-
for (unsigned I = 0; I < N; ++I) {
476+
uint64_t NewLast;
477+
for (uint64_t I = 0; I < N; ++I) {
475478
if (DP[Set][I] == -1)
476479
continue;
477-
if (DP[Set][I] > Best) {
478-
Last = I;
479-
Best = DP[Set][I];
480+
int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
481+
if (DP[Set][I] + AdjWeight > Best) {
482+
NewLast = I;
483+
Best = DP[Set][I] + AdjWeight;
480484
}
481485
}
486+
Last = NewLast;
482487
Visited[Last] = true;
483488
Order.push_back(IndexToBB[Last]);
484-
Set = Set & ~(1U << Last);
489+
Set = Set & ~(1ULL << Last);
485490
}
486491
std::reverse(Order.begin(), Order.end());
487492

488-
// Finalize layout with BBs that weren't assigned to the layout
489-
for (auto BB : BF.layout()) {
490-
if (Visited[BBToIndex[BB]] == false)
493+
// Finalize layout with BBs that weren't assigned to the layout using the
494+
// input layout.
495+
for (auto *BB : BF.layout()) {
496+
if (Visited[BB->getLayoutIndex()] == false)
491497
Order.push_back(BB);
492498
}
493499
}
494500

495501
void OptimizeReorderAlgorithm::reorderBasicBlocks(
496-
const BinaryFunction &BF, BasicBlockOrder &Order) const {
502+
const BinaryFunction &BF, BasicBlockOrder &Order) const {
497503
if (BF.layout_empty())
498504
return;
499505

@@ -509,7 +515,7 @@ void OptimizeReorderAlgorithm::reorderBasicBlocks(
509515
}
510516

511517
void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
512-
const BinaryFunction &BF, BasicBlockOrder &Order) const {
518+
const BinaryFunction &BF, BasicBlockOrder &Order) const {
513519
if (BF.layout_empty())
514520
return;
515521

0 commit comments

Comments
 (0)