Skip to content

Commit 1d0aa6c

Browse files
authored
[BOLT] Fix impute-fall-throughs (#166305)
BOLT expects pre-aggregated profile entries to be unique, which holds for externally aggregated traces (or branches+fall-through ranges). Therefore, BOLT doesn't merge duplicate entries for faster processing. However, such traces are not expressly prohibited and could come from concatenated pre-aggregated profiles or otherwise. Relax the assumption about no duplicate (branch-only) traces in fall- through imputing. Test Plan: updated callcont-fallthru.s
1 parent 15b19c7 commit 1d0aa6c

File tree

2 files changed

+16
-7
lines changed

2 files changed

+16
-7
lines changed

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -564,30 +564,30 @@ void DataAggregator::imputeFallThroughs() {
564564
// Skip fall-throughs in external code.
565565
if (Trace.From == Trace::EXTERNAL)
566566
continue;
567-
std::pair CurrentBranch(Trace.Branch, Trace.From);
567+
if (std::pair CurrentBranch(Trace.Branch, Trace.From);
568+
CurrentBranch != PrevBranch) {
569+
// New group: reset aggregates.
570+
AggregateCount = AggregateFallthroughSize = 0;
571+
PrevBranch = CurrentBranch;
572+
}
568573
// BR_ONLY must be the last trace in the group
569574
if (Trace.To == Trace::BR_ONLY) {
570575
// If the group is not empty, use aggregate values, otherwise 0-length
571576
// for unconditional jumps (call/ret/uncond branch) or 1-length for others
572577
uint64_t InferredBytes =
573-
PrevBranch == CurrentBranch
578+
AggregateFallthroughSize
574579
? AggregateFallthroughSize / AggregateCount
575580
: !checkUnconditionalControlTransfer(Trace.From);
576581
Trace.To = Trace.From + InferredBytes;
577582
LLVM_DEBUG(dbgs() << "imputed " << Trace << " (" << InferredBytes
578583
<< " bytes)\n");
579584
++InferredTraces;
580585
} else {
581-
// Trace with a valid fall-through
582-
// New group: reset aggregates.
583-
if (CurrentBranch != PrevBranch)
584-
AggregateCount = AggregateFallthroughSize = 0;
585586
// Only use valid fall-through lengths
586587
if (Trace.To != Trace::EXTERNAL)
587588
AggregateFallthroughSize += (Trace.To - Trace.From) * Info.TakenCount;
588589
AggregateCount += Info.TakenCount;
589590
}
590-
PrevBranch = CurrentBranch;
591591
}
592592
if (opts::Verbosity >= 1)
593593
outs() << "BOLT-INFO: imputed " << InferredTraces << " traces\n";

bolt/test/X86/callcont-fallthru.s

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@
1515
# External return to a landing pad/entry point call continuation
1616
# RUN: link_fdata %s %t %t.pa-eret PREAGG-ERET
1717
# RUN-DISABLED: link_fdata %s %t %t.pa-plt PREAGG-PLT
18+
## Fall-through imputing test cases
19+
# RUN: link_fdata %s %t %t.pa-imp PREAGG-IMP
1820

1921
# RUN: llvm-strip --strip-unneeded %t -o %t.strip
2022
# RUN: llvm-objcopy --remove-section=.eh_frame %t.strip %t.noeh
@@ -63,6 +65,11 @@
6365
# RUN-DISABLED: --check-prefix=CHECK-PLT
6466
# CHECK-PLT: traces mismatching disassembled function contents: 0
6567

68+
## Check --impute-trace-fall-throughs accepting duplicate branch-only traces
69+
# RUN: perf2bolt %t --pa -p %t.pa-imp -o %t.pa-imp.fdata --impute-trace-fall-through
70+
# RUN: FileCheck %s --check-prefix=CHECK-IMP --input-file %t.pa-imp.fdata
71+
# CHECK-IMP: 0 [unknown] 0 1 main {{.*}} 0 3
72+
6673
.globl foo
6774
.type foo, %function
6875
foo:
@@ -102,6 +109,8 @@ Ltmp1:
102109

103110
Ltmp4:
104111
cmpl $0x0, -0x14(%rbp)
112+
# PREAGG-IMP: B X:0 #Ltmp4_br# 1 0
113+
# PREAGG-IMP: B X:0 #Ltmp4_br# 2 0
105114
Ltmp4_br:
106115
je Ltmp0
107116

0 commit comments

Comments
 (0)