Skip to content

Commit a5c1bc3

Browse files
committed
Rebase, address comments
Created using spr 1.3.5
2 parents afb5bd2 + 6f62757 commit a5c1bc3

File tree

2,561 files changed

+103696
-34284
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

2,561 files changed

+103696
-34284
lines changed

.github/workflows/libcxx-build-and-test.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ jobs:
146146
'generic-no-experimental',
147147
'generic-no-filesystem',
148148
'generic-no-localization',
149+
'generic-no-terminal',
149150
'generic-no-random_device',
150151
'generic-no-threads',
151152
'generic-no-tzdb',

.mailmap

Lines changed: 1 addition & 1 deletion

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2502,7 +2502,7 @@ void BinaryFunction::annotateCFIState() {
25022502
}
25032503
}
25042504

2505-
if (!StateStack.empty()) {
2505+
if (opts::Verbosity >= 1 && !StateStack.empty()) {
25062506
BC.errs() << "BOLT-WARNING: non-empty CFI stack at the end of " << *this
25072507
<< '\n';
25082508
}

bolt/lib/Core/BinaryFunctionProfile.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -336,7 +336,8 @@ void BinaryFunction::inferFallThroughCounts() {
336336
if (SuccBI.Count == 0) {
337337
SuccBI.Count = Inferred;
338338
SuccBI.MispredictedCount = BinaryBasicBlock::COUNT_INFERRED;
339-
Succ->ExecutionCount += Inferred;
339+
Succ->ExecutionCount =
340+
std::max(Succ->getKnownExecutionCount(), Inferred);
340341
}
341342
}
342343
}

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2415,17 +2415,15 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
24152415
Fragments.insert(BF);
24162416
for (const BinaryFunction *F : Fragments) {
24172417
const uint64_t FuncAddr = F->getAddress();
2418-
const auto &FragmentProbes =
2419-
llvm::make_range(ProbeMap.lower_bound(FuncAddr),
2420-
ProbeMap.lower_bound(FuncAddr + F->getSize()));
2421-
for (const auto &[OutputAddress, Probes] : FragmentProbes) {
2418+
for (const MCDecodedPseudoProbe &Probe :
2419+
ProbeMap.find(FuncAddr, FuncAddr + F->getSize())) {
2420+
const uint32_t OutputAddress = Probe.getAddress();
24222421
const uint32_t InputOffset = BAT->translate(
24232422
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
24242423
const unsigned BlockIndex = getBlock(InputOffset).second;
2425-
for (const MCDecodedPseudoProbe &Probe : Probes)
2426-
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2427-
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2428-
Probe.getType()});
2424+
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
2425+
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
2426+
Probe.getType()});
24292427
}
24302428
}
24312429
}

bolt/lib/Profile/YAMLProfileWriter.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,10 @@ YAMLProfileWriter::convert(const BinaryFunction &BF, bool UseDFS,
193193
const uint64_t FuncAddr = BF.getAddress();
194194
const std::pair<uint64_t, uint64_t> &BlockRange =
195195
BB->getInputAddressRange();
196-
const auto &BlockProbes =
197-
llvm::make_range(ProbeMap.lower_bound(FuncAddr + BlockRange.first),
198-
ProbeMap.lower_bound(FuncAddr + BlockRange.second));
199-
for (const auto &[_, Probes] : BlockProbes)
200-
for (const MCDecodedPseudoProbe &Probe : Probes)
201-
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
202-
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
196+
for (const MCDecodedPseudoProbe &Probe : ProbeMap.find(
197+
FuncAddr + BlockRange.first, FuncAddr + BlockRange.second))
198+
YamlBB.PseudoProbes.emplace_back(yaml::bolt::PseudoProbeInfo{
199+
Probe.getGuid(), Probe.getIndex(), Probe.getType()});
203200
}
204201

205202
YamlBF.Blocks.emplace_back(YamlBB);

bolt/lib/Rewrite/PseudoProbeRewriter.cpp

Lines changed: 53 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,6 @@ void PseudoProbeRewriter::parsePseudoProbe() {
143143
if (!ProbeDecoder.buildAddress2ProbeMap(
144144
reinterpret_cast<const uint8_t *>(Contents.data()), Contents.size(),
145145
GuidFilter, FuncStartAddrs)) {
146-
ProbeDecoder.getAddress2ProbesMap().clear();
147146
errs() << "BOLT-WARNING: fail in building Address2ProbeMap\n";
148147
return;
149148
}
@@ -156,7 +155,8 @@ void PseudoProbeRewriter::parsePseudoProbe() {
156155
ProbeDecoder.printProbesForAllAddresses(outs());
157156
}
158157

159-
for (const auto &[GUID, FuncDesc] : ProbeDecoder.getGUID2FuncDescMap()) {
158+
for (const auto &FuncDesc : ProbeDecoder.getGUID2FuncDescMap()) {
159+
uint64_t GUID = FuncDesc.FuncGUID;
160160
if (!FuncStartAddrs.contains(GUID))
161161
continue;
162162
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(FuncStartAddrs[GUID]);
@@ -174,59 +174,50 @@ void PseudoProbeRewriter::updatePseudoProbes() {
174174
AddressProbesMap &Address2ProbesMap = ProbeDecoder.getAddress2ProbesMap();
175175
const GUIDProbeFunctionMap &GUID2Func = ProbeDecoder.getGUID2FuncDescMap();
176176

177-
for (auto &AP : Address2ProbesMap) {
178-
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(AP.first);
177+
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
178+
uint64_t Address = Probe.getAddress();
179+
BinaryFunction *F = BC.getBinaryFunctionContainingAddress(Address);
179180
// If F is removed, eliminate all probes inside it from inline tree
180181
// Setting probes' addresses as INT64_MAX means elimination
181182
if (!F) {
182-
for (MCDecodedPseudoProbe &Probe : AP.second)
183-
Probe.setAddress(INT64_MAX);
183+
Probe.setAddress(INT64_MAX);
184184
continue;
185185
}
186186
// If F is not emitted, the function will remain in the same address as its
187187
// input
188188
if (!F->isEmitted())
189189
continue;
190190

191-
uint64_t Offset = AP.first - F->getAddress();
191+
uint64_t Offset = Address - F->getAddress();
192192
const BinaryBasicBlock *BB = F->getBasicBlockContainingOffset(Offset);
193193
uint64_t BlkOutputAddress = BB->getOutputAddressRange().first;
194194
// Check if block output address is defined.
195195
// If not, such block is removed from binary. Then remove the probes from
196196
// inline tree
197197
if (BlkOutputAddress == 0) {
198-
for (MCDecodedPseudoProbe &Probe : AP.second)
199-
Probe.setAddress(INT64_MAX);
198+
Probe.setAddress(INT64_MAX);
200199
continue;
201200
}
202201

203-
unsigned ProbeTrack = AP.second.size();
204-
std::list<MCDecodedPseudoProbe>::iterator Probe = AP.second.begin();
205-
while (ProbeTrack != 0) {
206-
if (Probe->isBlock()) {
207-
Probe->setAddress(BlkOutputAddress);
208-
} else if (Probe->isCall()) {
209-
// A call probe may be duplicated due to ICP
210-
// Go through output of InputOffsetToAddressMap to collect all related
211-
// probes
212-
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(AP.first);
213-
auto CallOutputAddress = CallOutputAddresses.first;
214-
if (CallOutputAddress == CallOutputAddresses.second) {
215-
Probe->setAddress(INT64_MAX);
216-
} else {
217-
Probe->setAddress(CallOutputAddress->second);
218-
CallOutputAddress = std::next(CallOutputAddress);
219-
}
220-
221-
while (CallOutputAddress != CallOutputAddresses.second) {
222-
AP.second.push_back(*Probe);
223-
AP.second.back().setAddress(CallOutputAddress->second);
224-
Probe->getInlineTreeNode()->addProbes(&(AP.second.back()));
225-
CallOutputAddress = std::next(CallOutputAddress);
226-
}
202+
if (Probe.isBlock()) {
203+
Probe.setAddress(BlkOutputAddress);
204+
} else if (Probe.isCall()) {
205+
// A call probe may be duplicated due to ICP
206+
// Go through output of InputOffsetToAddressMap to collect all related
207+
// probes
208+
auto CallOutputAddresses = BC.getIOAddressMap().lookupAll(Address);
209+
auto CallOutputAddress = CallOutputAddresses.first;
210+
if (CallOutputAddress == CallOutputAddresses.second) {
211+
Probe.setAddress(INT64_MAX);
212+
} else {
213+
Probe.setAddress(CallOutputAddress->second);
214+
CallOutputAddress = std::next(CallOutputAddress);
215+
}
216+
217+
while (CallOutputAddress != CallOutputAddresses.second) {
218+
ProbeDecoder.addInjectedProbe(Probe, CallOutputAddress->second);
219+
CallOutputAddress = std::next(CallOutputAddress);
227220
}
228-
Probe = std::next(Probe);
229-
ProbeTrack--;
230221
}
231222
}
232223

@@ -242,22 +233,16 @@ void PseudoProbeRewriter::updatePseudoProbes() {
242233
BinaryBlock.getName();
243234

244235
// scan all addresses -> correlate probe to block when print out
245-
std::vector<uint64_t> Addresses;
246-
for (auto &Entry : Address2ProbesMap)
247-
Addresses.push_back(Entry.first);
248-
llvm::sort(Addresses);
249-
for (uint64_t Key : Addresses) {
250-
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap[Key]) {
251-
if (Probe.getAddress() == INT64_MAX)
252-
outs() << "Deleted Probe: ";
253-
else
254-
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
255-
Probe.print(outs(), GUID2Func, true);
256-
// print block name only if the probe is block type and undeleted.
257-
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
258-
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
259-
<< Addr2BlockNames[Probe.getAddress()] << "\n";
260-
}
236+
for (MCDecodedPseudoProbe &Probe : Address2ProbesMap) {
237+
if (Probe.getAddress() == INT64_MAX)
238+
outs() << "Deleted Probe: ";
239+
else
240+
outs() << "Address: " << format_hex(Probe.getAddress(), 8) << " ";
241+
Probe.print(outs(), GUID2Func, true);
242+
// print block name only if the probe is block type and undeleted.
243+
if (Probe.isBlock() && Probe.getAddress() != INT64_MAX)
244+
outs() << format_hex(Probe.getAddress(), 8) << " Probe is in "
245+
<< Addr2BlockNames[Probe.getAddress()] << "\n";
261246
}
262247
outs() << "=======================================\n";
263248
}
@@ -333,7 +318,7 @@ void PseudoProbeRewriter::encodePseudoProbes() {
333318
ProbeDecoder.getDummyInlineRoot();
334319
for (auto Child = Root.getChildren().begin();
335320
Child != Root.getChildren().end(); ++Child)
336-
Inlinees[Child->first] = Child->second.get();
321+
Inlinees[Child->getInlineSite()] = &*Child;
337322

338323
for (auto Inlinee : Inlinees)
339324
// INT64_MAX is "placeholder" of unused callsite index field in the pair
@@ -359,25 +344,37 @@ void PseudoProbeRewriter::encodePseudoProbes() {
359344
EmitInt(Cur->Guid, 8);
360345
// Emit number of probes in this node
361346
uint64_t Deleted = 0;
362-
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes())
347+
for (MCDecodedPseudoProbe *&Probe :
348+
llvm::make_pointer_range(Cur->getProbes()))
363349
if (Probe->getAddress() == INT64_MAX)
364350
Deleted++;
365351
LLVM_DEBUG(dbgs() << "Deleted Probes:" << Deleted << "\n");
366-
uint64_t ProbesSize = Cur->getProbes().size() - Deleted;
352+
size_t InjectedProbes = ProbeDecoder.getNumInjectedProbes(Cur);
353+
uint64_t ProbesSize = Cur->getProbes().size() - Deleted + InjectedProbes;
367354
EmitULEB128IntValue(ProbesSize);
368355
// Emit number of direct inlinees
369356
EmitULEB128IntValue(Cur->getChildren().size());
370357
// Emit probes in this group
371-
for (MCDecodedPseudoProbe *&Probe : Cur->getProbes()) {
358+
for (MCDecodedPseudoProbe *&Probe :
359+
llvm::make_pointer_range(Cur->getProbes())) {
372360
if (Probe->getAddress() == INT64_MAX)
373361
continue;
374362
EmitDecodedPseudoProbe(Probe);
375363
LastProbe = Probe;
376364
}
365+
if (InjectedProbes) {
366+
for (MCDecodedPseudoProbe *&Probe :
367+
llvm::make_pointer_range(ProbeDecoder.getInjectedProbes(Cur))) {
368+
if (Probe->getAddress() == INT64_MAX)
369+
continue;
370+
EmitDecodedPseudoProbe(Probe);
371+
LastProbe = Probe;
372+
}
373+
}
377374

378375
for (auto Child = Cur->getChildren().begin();
379376
Child != Cur->getChildren().end(); ++Child)
380-
Inlinees[Child->first] = Child->second.get();
377+
Inlinees[Child->getInlineSite()] = &*Child;
381378
for (const auto &Inlinee : Inlinees) {
382379
assert(Cur->Guid != 0 && "non root tree node must have nonzero Guid");
383380
NextNodes.push_back({std::get<1>(Inlinee.first), Inlinee.second});

bolt/test/X86/end-symbol.test

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
# RUN: yaml2obj %p/Inputs/plt-sec.yaml &> %t.exe
22
# RUN: llvm-bolt %t.exe -o %t.out
3-
# RUN: (llvm-readelf --program-headers %t.out | grep LOAD | tail -n 1 ; llvm-nm %t.out) \
4-
# RUN: | FileCheck %s
3+
4+
# RUN: llvm-readelf --program-headers %t.out | grep LOAD | tail -n 1 > %t.load
5+
# RUN: llvm-nm %t.out >> %t.load
6+
# RUN: FileCheck %s < %t.load
57

68
## Check that llvm-bolt correctly updates _end symbol to match the end of the
79
## last loadable segment.
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
## Test that infer-fall-throughs would correctly infer the wrong fall-through
2+
## edge count in the example
3+
4+
# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
5+
# RUN: link_fdata %s %t.o %t.fdata
6+
# RUN: llvm-strip --strip-unneeded %t.o
7+
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
8+
# RUN: llvm-bolt %t.exe -o %t.bolt \
9+
# RUN: --print-estimate-edge-counts --data=%t.fdata \
10+
# RUN: 2>&1 | FileCheck --check-prefix=WITHOUTINFERENCE %s
11+
# RUN: llvm-bolt %t.exe -o %t.bolt --infer-fall-throughs \
12+
# RUN: --print-estimate-edge-counts --data=%t.fdata \
13+
# RUN: 2>&1 | FileCheck --check-prefix=CORRECTINFERENCE %s
14+
15+
16+
# WITHOUTINFERENCE: Binary Function "main" after estimate-edge-counts
17+
# WITHOUTINFERENCE: {{^\.Ltmp0}}
18+
# WITHOUTINFERENCE: Successors: .Ltmp1 (mispreds: 0, count: 10), .LFT0 (mispreds: 0, count: 0)
19+
# WITHOUTINFERENCE: {{^\.LFT0}}
20+
# WITHOUTINFERENCE: Exec Count : 490
21+
22+
# CORRECTINFERENCE: Binary Function "main" after estimate-edge-counts
23+
# CORRECTINFERENCE: {{^\.Ltmp0}}
24+
# CORRECTINFERENCE: Successors: .Ltmp1 (mispreds: 0, count: 10), .LFT0 (inferred count: 490)
25+
# CORRECTINFERENCE: {{^\.LFT0}}
26+
# CORRECTINFERENCE: Exec Count : 490
27+
28+
29+
.globl main
30+
.type main, @function
31+
main:
32+
LLmain_LLstart:
33+
jmp LLstart
34+
# FDATA: 1 main #LLmain_LLstart# 1 main #LLstart# 0 500
35+
LLstart:
36+
jge LLexit
37+
# FDATA: 1 main #LLstart# 1 main #LLexit# 0 10
38+
# FDATA: 1 main #LLstart# 1 main #LLmore# 0 0
39+
LLmore:
40+
movl $5, %eax
41+
# FDATA: 1 main #LLmore# 1 main #LLexit# 0 490
42+
LLexit:
43+
ret
44+
.LLmain_end:
45+
.size main, .LLmain_end-main

bolt/test/X86/instrumentation-eh_frame_hdr.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,9 @@
66
// RUN: %clangxx %cxxflags -static -Wl,-q %s -o %t.exe -Wl,--entry=_start
77
// RUN: llvm-bolt %t.exe -o %t.instr -instrument \
88
// RUN: --instrumentation-file=%t.fdata -instrumentation-sleep-time=1
9-
// RUN: (llvm-readelf -SW %t.instr | grep -v bolt; llvm-readelf -lW %t.instr | \
10-
// RUN: grep LOAD | tail -n 1) | FileCheck %s
9+
// RUN: llvm-readelf -SW %t.instr | grep -v bolt > %t.sections
10+
// RUN: llvm-readelf -lW %t.instr | grep LOAD | tail -n 1 >> %t.sections
11+
// RUN: FileCheck %s < %t.sections
1112

1213
// CHECK: {{.*}} .eh_frame_hdr PROGBITS [[#%x, EH_ADDR:]]
1314
// CHECK: LOAD 0x[[#%x, LD_OFFSET:]] 0x[[#%x, LD_VADDR:]] 0x[[#%x, LD_FSIZE:]]

0 commit comments

Comments
 (0)