Skip to content

Commit 03ca42c

Browse files
author
Alexander Yermolovich
committed
safe-icf
Summary: Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D65799965
1 parent 911cee2 commit 03ca42c

25 files changed

+2431
-74
lines changed

bolt/include/bolt/Core/BinaryContext.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,12 @@ class BinaryContext {
282282
std::unique_ptr<DWARFContext> DwCtx,
283283
JournalingStreams Logger);
284284

285+
/// Returns addend of a relocation.
286+
static int64_t getRelocationAddend(const ELFObjectFileBase *Obj,
287+
const RelocationRef &Rel);
288+
/// Returns symbol of a relocation.
289+
static uint32_t getRelocationSymbol(const ELFObjectFileBase *Obj,
290+
const RelocationRef &Rel);
285291
/// Superset of compiler units that will contain overwritten code that needs
286292
/// new debug info. In a few cases, functions may end up not being
287293
/// overwritten, but it is okay to re-generate debug info for them.
@@ -1350,6 +1356,9 @@ class BinaryContext {
13501356
return Code.size();
13511357
}
13521358

1359+
/// Processes .text section to identify function references.
1360+
void processInstructionForFuncReferences(const MCInst &Inst);
1361+
13531362
/// Compute the native code size for a range of instructions.
13541363
/// Note: this can be imprecise wrt the final binary since happening prior to
13551364
/// relaxation, as well as wrt the original binary because of opcode

bolt/include/bolt/Core/BinaryFunction.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,9 @@ class BinaryFunction {
428428
/// Function order for streaming into the destination binary.
429429
uint32_t Index{-1U};
430430

431+
/// Indicates if the function is safe to fold.
432+
bool IsSafeToICF{true};
433+
431434
/// Get basic block index assuming it belongs to this function.
432435
unsigned getIndex(const BinaryBasicBlock *BB) const {
433436
assert(BB->getIndex() < BasicBlocks.size());
@@ -817,6 +820,12 @@ class BinaryFunction {
817820
return nullptr;
818821
}
819822

823+
/// Indicates if the function is safe to fold.
824+
bool isSafeToICF() const { return IsSafeToICF; }
825+
826+
/// Sets the function is not safe to fold.
827+
void setUnsetToICF() { IsSafeToICF = false; }
828+
820829
/// Returns the raw binary encoding of this function.
821830
ErrorOr<ArrayRef<uint8_t>> getData() const;
822831

bolt/include/bolt/Passes/IdenticalCodeFolding.h

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,21 @@ class IdenticalCodeFolding : public BinaryFunctionPass {
3131
}
3232

3333
public:
34-
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
35-
: BinaryFunctionPass(PrintPass) {}
34+
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass, bool IsSafeICF)
35+
: BinaryFunctionPass(PrintPass), IsSafeICF(IsSafeICF) {}
3636

3737
const char *getName() const override { return "identical-code-folding"; }
3838
Error runOnFunctions(BinaryContext &BC) override;
39+
40+
private:
41+
/// Create a skip list of functions that should not be folded.
42+
Error createFoldSkipList(BinaryContext &BC);
43+
/// Processes relocations in the .data section to identify function
44+
/// references.
45+
void processDataRelocations(BinaryContext &BC,
46+
const SectionRef &SecRefRelData);
47+
48+
bool IsSafeICF;
3949
};
4050

4151
} // namespace bolt

bolt/lib/Core/BinaryContext.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,63 @@ cl::opt<std::string> CompDirOverride(
7575
"location, which is used with DW_AT_dwo_name to construct a path "
7676
"to *.dwo files."),
7777
cl::Hidden, cl::init(""), cl::cat(BoltCategory));
78+
79+
cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
80+
cl::cat(BoltOptCategory));
81+
82+
cl::opt<bool> SafeICF("safe-icf",
83+
cl::desc("Enable safe identical code folding"),
84+
cl::cat(BoltOptCategory));
7885
} // namespace opts
7986

87+
namespace {
88+
template <typename ELFT>
89+
int64_t getRelocationAddend(const ELFObjectFile<ELFT> *Obj,
90+
const RelocationRef &RelRef) {
91+
using ELFShdrTy = typename ELFT::Shdr;
92+
using Elf_Rela = typename ELFT::Rela;
93+
int64_t Addend = 0;
94+
const ELFFile<ELFT> &EF = Obj->getELFFile();
95+
DataRefImpl Rel = RelRef.getRawDataRefImpl();
96+
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
97+
switch (RelocationSection->sh_type) {
98+
default:
99+
llvm_unreachable("unexpected relocation section type");
100+
case ELF::SHT_REL:
101+
break;
102+
case ELF::SHT_RELA: {
103+
const Elf_Rela *RelA = Obj->getRela(Rel);
104+
Addend = RelA->r_addend;
105+
break;
106+
}
107+
}
108+
109+
return Addend;
110+
}
111+
112+
template <typename ELFT>
113+
uint32_t getRelocationSymbol(const ELFObjectFile<ELFT> *Obj,
114+
const RelocationRef &RelRef) {
115+
using ELFShdrTy = typename ELFT::Shdr;
116+
uint32_t Symbol = 0;
117+
const ELFFile<ELFT> &EF = Obj->getELFFile();
118+
DataRefImpl Rel = RelRef.getRawDataRefImpl();
119+
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
120+
switch (RelocationSection->sh_type) {
121+
default:
122+
llvm_unreachable("unexpected relocation section type");
123+
case ELF::SHT_REL:
124+
Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
125+
break;
126+
case ELF::SHT_RELA:
127+
Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
128+
break;
129+
}
130+
131+
return Symbol;
132+
}
133+
} // anonymous namespace
134+
80135
namespace llvm {
81136
namespace bolt {
82137

@@ -156,6 +211,16 @@ BinaryContext::~BinaryContext() {
156211
clearBinaryData();
157212
}
158213

214+
uint32_t BinaryContext::getRelocationSymbol(const ELFObjectFileBase *Obj,
215+
const RelocationRef &Rel) {
216+
return ::getRelocationSymbol(cast<ELF64LEObjectFile>(Obj), Rel);
217+
}
218+
219+
int64_t BinaryContext::getRelocationAddend(const ELFObjectFileBase *Obj,
220+
const RelocationRef &Rel) {
221+
return ::getRelocationAddend(cast<ELF64LEObjectFile>(Obj), Rel);
222+
}
223+
159224
/// Create BinaryContext for a given architecture \p ArchName and
160225
/// triple \p TripleName.
161226
Expected<std::unique_ptr<BinaryContext>> BinaryContext::createBinaryContext(
@@ -1945,6 +2010,27 @@ static void printDebugInfo(raw_ostream &OS, const MCInst &Instruction,
19452010
OS << " discriminator:" << Row.Discriminator;
19462011
}
19472012

2013+
static bool skipInstruction(const MCInst &Inst, const BinaryContext &BC) {
2014+
const bool IsX86 = BC.isX86();
2015+
return (BC.MIB->isPseudo(Inst) || BC.MIB->isUnconditionalBranch(Inst) ||
2016+
(IsX86 && BC.MIB->isConditionalBranch(Inst)) ||
2017+
BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst));
2018+
}
2019+
void BinaryContext::processInstructionForFuncReferences(const MCInst &Inst) {
2020+
if (!opts::SafeICF || skipInstruction(Inst, *this))
2021+
return;
2022+
for (const MCOperand &Op : MCPlus::primeOperands(Inst)) {
2023+
if (Op.isExpr()) {
2024+
const MCExpr &Expr = *Op.getExpr();
2025+
if (Expr.getKind() == MCExpr::SymbolRef) {
2026+
const MCSymbol &Symbol = cast<MCSymbolRefExpr>(Expr).getSymbol();
2027+
if (BinaryFunction *BF = getFunctionForSymbol(&Symbol))
2028+
BF->setUnsetToICF();
2029+
}
2030+
}
2031+
}
2032+
}
2033+
19482034
void BinaryContext::printInstruction(raw_ostream &OS, const MCInst &Instruction,
19492035
uint64_t Offset,
19502036
const BinaryFunction *Function,

bolt/lib/Core/BinaryFunction.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1626,6 +1626,8 @@ bool BinaryFunction::scanExternalRefs() {
16261626
if (!BC.HasRelocations)
16271627
continue;
16281628

1629+
BC.processInstructionForFuncReferences(Instruction);
1630+
16291631
if (BranchTargetSymbol) {
16301632
BC.MIB->replaceBranchTarget(Instruction, BranchTargetSymbol,
16311633
Emitter.LocalCtx.get());

bolt/lib/Passes/IdenticalCodeFolding.cpp

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,10 @@
1111
//===----------------------------------------------------------------------===//
1212

1313
#include "bolt/Passes/IdenticalCodeFolding.h"
14+
#include "bolt/Core/BinaryContext.h"
1415
#include "bolt/Core/HashUtilities.h"
1516
#include "bolt/Core/ParallelUtilities.h"
17+
#include "bolt/Rewrite/RewriteInstance.h"
1618
#include "llvm/ADT/SmallVector.h"
1719
#include "llvm/Support/CommandLine.h"
1820
#include "llvm/Support/ThreadPool.h"
@@ -31,6 +33,7 @@ using namespace bolt;
3133
namespace opts {
3234

3335
extern cl::OptionCategory BoltOptCategory;
36+
extern cl::opt<unsigned> Verbosity;
3437

3538
static cl::opt<bool>
3639
ICFUseDFS("icf-dfs", cl::desc("use DFS ordering when using -icf option"),
@@ -341,6 +344,75 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
341344
namespace llvm {
342345
namespace bolt {
343346

347+
void IdenticalCodeFolding::processDataRelocations(
348+
BinaryContext &BC, const SectionRef &SecRefRelData) {
349+
for (const RelocationRef &Rel : SecRefRelData.relocations()) {
350+
symbol_iterator SymbolIter = Rel.getSymbol();
351+
const ObjectFile *OwningObj = Rel.getObject();
352+
assert(SymbolIter != OwningObj->symbol_end() &&
353+
"relocation Symbol expected");
354+
const SymbolRef &Symbol = *SymbolIter;
355+
const uint64_t SymbolAddress = cantFail(Symbol.getAddress());
356+
const ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(OwningObj);
357+
if (!ELFObj)
358+
assert(false && "Only ELFObjectFileBase is supported");
359+
const int64_t Addend = BinaryContext::getRelocationAddend(ELFObj, Rel);
360+
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(SymbolAddress + Addend);
361+
if (!BF)
362+
continue;
363+
BF->setUnsetToICF();
364+
}
365+
}
366+
367+
Error IdenticalCodeFolding::createFoldSkipList(BinaryContext &BC) {
368+
Error ErrorStatus = Error::success();
369+
ErrorOr<BinarySection &> SecRelData = BC.getUniqueSectionByName(".rela.data");
370+
if (!BC.HasRelocations)
371+
ErrorStatus = joinErrors(
372+
std::move(ErrorStatus),
373+
createFatalBOLTError(Twine("BOLT-ERROR: Binary built without "
374+
"relocations. Safe ICF is not supported")));
375+
if (ErrorStatus)
376+
return ErrorStatus;
377+
if (SecRelData) {
378+
SectionRef SecRefRelData = SecRelData->getSectionRef();
379+
processDataRelocations(BC, SecRefRelData);
380+
}
381+
382+
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
383+
if (BF.getState() == BinaryFunction::State::CFG) {
384+
for (const BinaryBasicBlock *BB : BF.getLayout().blocks())
385+
for (const MCInst &Inst : *BB)
386+
BC.processInstructionForFuncReferences(Inst);
387+
}
388+
};
389+
ParallelUtilities::PredicateTy SkipFunc =
390+
[&](const BinaryFunction &BF) -> bool { return (bool)ErrorStatus; };
391+
ParallelUtilities::runOnEachFunction(
392+
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
393+
"markUnsafe", /*ForceSequential*/ false, 2);
394+
395+
LLVM_DEBUG({
396+
std::vector<StringRef> Vect;
397+
std::mutex PrintMutex;
398+
ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
399+
if (BF.isSafeToICF())
400+
return;
401+
std::lock_guard<std::mutex> Lock(PrintMutex);
402+
Vect.push_back(BF.getOneName());
403+
};
404+
ParallelUtilities::PredicateTy SkipFunc =
405+
[&](const BinaryFunction &BF) -> bool { return false; };
406+
ParallelUtilities::runOnEachFunction(
407+
BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
408+
"markUnsafe", /*ForceSequential*/ false, 2);
409+
llvm::sort(Vect);
410+
for (const auto &FuncName : Vect)
411+
dbgs() << "BOLT-DEBUG: skipping function " << FuncName << '\n';
412+
});
413+
return ErrorStatus;
414+
}
415+
344416
Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
345417
const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
346418
uint64_t NumFunctionsFolded = 0;
@@ -350,6 +422,9 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
350422
std::atomic<uint64_t> NumFoldedLastIteration{0};
351423
CongruentBucketsMap CongruentBuckets;
352424

425+
auto SkipFuncShared = [&](const BinaryFunction &BF) {
426+
return !shouldOptimize(BF) || !BF.isSafeToICF();
427+
};
353428
// Hash all the functions
354429
auto hashFunctions = [&]() {
355430
NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
@@ -369,7 +444,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
369444
};
370445

371446
ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
372-
return !shouldOptimize(BF);
447+
return SkipFuncShared(BF);
373448
};
374449

375450
ParallelUtilities::runOnEachFunction(
@@ -385,7 +460,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
385460
"ICF breakdown", opts::TimeICF);
386461
for (auto &BFI : BC.getBinaryFunctions()) {
387462
BinaryFunction &BF = BFI.second;
388-
if (!this->shouldOptimize(BF))
463+
if (SkipFuncShared(BF))
389464
continue;
390465
CongruentBuckets[&BF].emplace(&BF);
391466
}
@@ -475,7 +550,11 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
475550

476551
LLVM_DEBUG(SinglePass.stopTimer());
477552
};
478-
553+
if (IsSafeICF) {
554+
if (Error Err = createFoldSkipList(BC)) {
555+
return Err;
556+
}
557+
}
479558
hashFunctions();
480559
createCongruentBuckets();
481560

bolt/lib/Rewrite/BinaryPassManager.cpp

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,8 @@ extern cl::opt<bool> PrintDynoStats;
5454
extern cl::opt<bool> DumpDotAll;
5555
extern cl::opt<std::string> AsmDump;
5656
extern cl::opt<bolt::PLTCall::OptType> PLT;
57+
extern cl::opt<bool> ICF;
58+
extern cl::opt<bool> SafeICF;
5759

5860
static cl::opt<bool>
5961
DynoStatsAll("dyno-stats-all",
@@ -65,9 +67,6 @@ static cl::opt<bool>
6567
cl::desc("eliminate unreachable code"), cl::init(true),
6668
cl::cat(BoltOptCategory));
6769

68-
cl::opt<bool> ICF("icf", cl::desc("fold functions with identical code"),
69-
cl::cat(BoltOptCategory));
70-
7170
static cl::opt<bool> JTFootprintReductionFlag(
7271
"jt-footprint-reduction",
7372
cl::desc("make jump tables size smaller at the cost of using more "
@@ -397,8 +396,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
397396
Manager.registerPass(std::make_unique<StripRepRet>(NeverPrint),
398397
opts::StripRepRet);
399398

400-
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
401-
opts::ICF);
399+
Manager.registerPass(
400+
std::make_unique<IdenticalCodeFolding>(PrintICF, opts::SafeICF),
401+
opts::ICF || opts::SafeICF);
402402

403403
Manager.registerPass(
404404
std::make_unique<SpecializeMemcpy1>(NeverPrint, opts::SpecializeMemcpy1),
@@ -422,8 +422,9 @@ Error BinaryFunctionPassManager::runAllPasses(BinaryContext &BC) {
422422

423423
Manager.registerPass(std::make_unique<Inliner>(PrintInline));
424424

425-
Manager.registerPass(std::make_unique<IdenticalCodeFolding>(PrintICF),
426-
opts::ICF);
425+
Manager.registerPass(
426+
std::make_unique<IdenticalCodeFolding>(PrintICF, opts::SafeICF),
427+
opts::ICF || opts::SafeICF);
427428

428429
Manager.registerPass(std::make_unique<PLTCall>(PrintPLT));
429430

bolt/lib/Rewrite/BoltDiff.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ namespace opts {
2929
extern cl::OptionCategory BoltDiffCategory;
3030
extern cl::opt<bool> NeverPrint;
3131
extern cl::opt<bool> ICF;
32+
extern cl::opt<bool> SafeICF;
3233

3334
static cl::opt<bool> IgnoreLTOSuffix(
3435
"ignore-lto-suffix",
@@ -698,7 +699,7 @@ void RewriteInstance::compare(RewriteInstance &RI2) {
698699

699700
// Pre-pass ICF
700701
if (opts::ICF) {
701-
IdenticalCodeFolding ICF(opts::NeverPrint);
702+
IdenticalCodeFolding ICF(opts::NeverPrint, opts::SafeICF);
702703
outs() << "BOLT-DIFF: Starting ICF pass for binary 1";
703704
BC->logBOLTErrorsAndQuitOnFatal(ICF.runOnFunctions(*BC));
704705
outs() << "BOLT-DIFF: Starting ICF pass for binary 2";

0 commit comments

Comments
 (0)