Skip to content
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
03ca42c
safe-icf
Nov 12, 2024
237c02e
Changed icf option
Nov 15, 2024
d794fb0
addressed comments
Nov 15, 2024
6e853ca
Added four tests for pic/no-pic mode. Tests for global const function…
Nov 16, 2024
e36ec02
switched to enum version of icf compile option
Nov 16, 2024
b83c335
simplified debug print, removed shared func
Nov 20, 2024
39c7902
updated tests
Nov 20, 2024
39586b3
addressed comments
Nov 20, 2024
debeb6f
updated icf option descriptions
Nov 20, 2024
429075e
moved getRelocation* to Relocs.cpp
Nov 20, 2024
29cc466
moved code around
Nov 20, 2024
bf3bf2b
simplified assembly
Nov 22, 2024
e867900
changed comment
Nov 22, 2024
ab51077
consolidate main/helper assembly under main. Next step move to test t…
Nov 25, 2024
8fe4b3c
move assembly into test
Nov 26, 2024
dc41c58
removed some lines from assembly
Nov 26, 2024
6287a7f
removed comments, changed demangled names for key functions
Nov 26, 2024
c6a1965
removed .text and .file
Nov 26, 2024
d393b5e
Changed schedule strategy, cleaned up tests some more, and removed so…
Nov 30, 2024
4ace9bd
moved reloc checking code, and changed elf object from assert to retu…
Nov 30, 2024
09bc3fa
fixing formatting that was messed up due to VSC format on save gettin…
Nov 30, 2024
3912b85
some cleanup after code changes to address comments
Dec 1, 2024
769cf24
clang-format
Dec 1, 2024
d4f81c9
removed some redundant tests, re-added on more targetted one that jus…
Dec 4, 2024
ec54f24
removed one more test I missed
Dec 4, 2024
13bcc22
Changed so that general data sections are processed. Moved ICF option…
Dec 5, 2024
5110f74
Added vtable filtering, and check fo X86 architecture
Dec 6, 2024
d524ed5
moved check as first one
Dec 6, 2024
77ae0a2
fix formatter
Dec 6, 2024
d03ff1d
minor nits
Dec 10, 2024
38758ee
Removed skip and moved processInstructionForFuncReferences, nits
Dec 10, 2024
bfb1dc6
refactor bit vector
Dec 11, 2024
417196c
nit
Dec 11, 2024
bbf015f
changed to using BinarySections, cleaned up code that is no longer ne…
Dec 13, 2024
1ef1d27
clang-format
Dec 13, 2024
f1813b3
more cleanup
Dec 13, 2024
eb882e5
addressed comments
Dec 13, 2024
f6dcb90
added comment for static relocs
Dec 13, 2024
0070ceb
Added deprecation warning, switched to SparseBitVector
Dec 14, 2024
0f96524
changed comments
Dec 16, 2024
a9cd467
more cleanup
Dec 16, 2024
6a89d5c
addressed comments
Dec 17, 2024
645aac2
small change to comment
Dec 17, 2024
23ea76b
changed comments for HasAddressTaken
Dec 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion bolt/docs/CommandLineArgumentReference.md
Original file line number Diff line number Diff line change
Expand Up @@ -498,9 +498,12 @@
Automatically put hot code on 2MB page(s) (hugify) at runtime. No manual call
to hugify is needed in the binary (which is what --hot-text relies on).

- `--icf`
- `--icf=<value>`

Fold functions with identical code
- `all`: Enable identical code folding
- `none`: Disable identical code folding (default)
- `safe`: Enable safe identical code folding

- `--icp`

Expand Down
13 changes: 13 additions & 0 deletions bolt/include/bolt/Core/BinaryFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,9 @@ class BinaryFunction {
/// Function order for streaming into the destination binary.
uint32_t Index{-1U};

/// Indicates function is referenced by non-control flow instruction.
bool HasAddressTaken{false};

/// Get basic block index assuming it belongs to this function.
unsigned getIndex(const BinaryBasicBlock *BB) const {
assert(BB->getIndex() < BasicBlocks.size());
Expand Down Expand Up @@ -817,6 +820,12 @@ class BinaryFunction {
return nullptr;
}

/// Return true if function is referenced in non-control flow instructions..
bool hasAddressTaken() const { return HasAddressTaken; }

/// Set whether function is referenced in non-control flow instructions.
void setHasAddressTaken(bool Hat) { HasAddressTaken = Hat; }

/// Returns the raw binary encoding of this function.
ErrorOr<ArrayRef<uint8_t>> getData() const;

Expand Down Expand Up @@ -2094,6 +2103,10 @@ class BinaryFunction {
// adjustments.
void handleAArch64IndirectCall(MCInst &Instruction, const uint64_t Offset);

/// Processes code section to identify function references.
void processInstructionForFuncReferences(BinaryContext &BC,
const MCInst &Inst);

/// Scan function for references to other functions. In relocation mode,
/// add relocations for external references. In non-relocation mode, detect
/// and mark new entry points.
Expand Down
7 changes: 7 additions & 0 deletions bolt/include/bolt/Core/Relocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/TargetParser/Triple.h"

namespace llvm {
Expand Down Expand Up @@ -178,6 +180,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Relocation &Rel) {
return OS;
}

uint32_t getRelocationSymbol(const object::ELFObjectFileBase *Obj,
const object::RelocationRef &Rel);

int64_t getRelocationAddend(const object::ELFObjectFileBase *Obj,
const object::RelocationRef &Rel);
} // namespace bolt
} // namespace llvm

Expand Down
29 changes: 28 additions & 1 deletion bolt/include/bolt/Passes/IdenticalCodeFolding.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,42 @@ class IdenticalCodeFolding : public BinaryFunctionPass {
return false;
if (BF.hasSDTMarker())
return false;
if (BF.hasAddressTaken())
return false;
return BinaryFunctionPass::shouldOptimize(BF);
}

public:
enum class ICFLevel {
None, // No ICF. (Default)
Safe, // Safe ICF for all sections.
All, // Aggressive ICF for code.
};
explicit IdenticalCodeFolding(const cl::opt<bool> &PrintPass)
: BinaryFunctionPass(PrintPass) {}
: BinaryFunctionPass(PrintPass) {
VtableBitVector.resize((((uint64_t)1) << 32) / 8);
}

const char *getName() const override { return "identical-code-folding"; }
Error runOnFunctions(BinaryContext &BC) override;

private:
/// Bit vector of memory addresses of vtables.
llvm::BitVector VtableBitVector;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to use SparseBitVector?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding random access is O(n) for it?
Considering how much debug information processing takes, seems fine?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, random access is O(N), but sequential is not:

testing/setting bits in a SparseBitVector is O(distance away from last set bit).

Vtables should occupy a vanishingly small part of the binary, so sparse shouldn't be too inefficient. You can also drop an explicit size. Can you check the two and compare peak RSS (disabling debug info update) and pass wall time?

Or let @maksfb weigh in.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BitVector
12:10.63 real, 2190.50 user, 3352.54 sys, 0 amem, 50543836 mmem
12:15.47 real, 2171.61 user, 3110.00 sys, 0 amem, 50486944 mmem
12:15.42 real, 2199.27 user, 3336.12 sys, 0 amem, 50550316 mmem
SparceBItVector
11:56.70 real, 2173.22 user, 3025.61 sys, 0 amem, 50546564 mmem
12:25.82 real, 2196.06 user, 3279.76 sys, 0 amem, 50561464 mmem
11:48.03 real, 2254.86 user, 3216.55 sys, 0 amem, 50547536 mmem

Seems slightly faster with SparseBitVector. 🤷

bool isInVTable(uint64_t Address) const {
return VtableBitVector.test(Address / 8);
}
/// Scans symbol table and creates a bit vector of memory addresses of
/// vtables.
void processSymbolTable(const BinaryContext &BC);
/// Analyze .text section and relocations and mark functions that are not
/// safe to fold.
Error markFunctionsUnsafeToFold(BinaryContext &BC);
/// Process relocations in the .data section to identify function
/// references.
Error processDataRelocations(BinaryContext &BC,
const SectionRef &SecRefRelData,
const bool HasAddressTaken);
};

} // namespace bolt
Expand Down
16 changes: 16 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1513,6 +1513,20 @@ MCSymbol *BinaryFunction::registerBranch(uint64_t Src, uint64_t Dst) {
return Target;
}

void BinaryFunction::processInstructionForFuncReferences(BinaryContext &BC,
const MCInst &Inst) {
for (const MCOperand &Op : MCPlus::primeOperands(Inst)) {
if (!Op.isExpr())
continue;
const MCExpr &Expr = *Op.getExpr();
if (Expr.getKind() == MCExpr::SymbolRef) {
const MCSymbol &Symbol = cast<MCSymbolRefExpr>(Expr).getSymbol();
if (BinaryFunction *BF = BC.getFunctionForSymbol(&Symbol))
BF->setHasAddressTaken(true);
}
}
}

bool BinaryFunction::scanExternalRefs() {
bool Success = true;
bool DisassemblyFailed = false;
Expand Down Expand Up @@ -1633,6 +1647,8 @@ bool BinaryFunction::scanExternalRefs() {
[](const MCOperand &Op) { return Op.isExpr(); })) {
// Skip assembly if the instruction may not have any symbolic operands.
continue;
} else {
processInstructionForFuncReferences(BC, Instruction);
}

// Emit the instruction using temp emitter and generate relocations.
Expand Down
62 changes: 62 additions & 0 deletions bolt/lib/Core/Relocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,3 +1115,65 @@ void Relocation::print(raw_ostream &OS) const {
OS << ", 0x" << Twine::utohexstr(Addend);
OS << ", 0x" << Twine::utohexstr(Value);
}

namespace {
template <typename ELFT>
int64_t getRelocationAddend(const llvm::object::ELFObjectFile<ELFT> *Obj,
const llvm::object::RelocationRef &RelRef) {
using ELFShdrTy = typename ELFT::Shdr;
using Elf_Rela = typename ELFT::Rela;
int64_t Addend = 0;
const llvm::object::ELFFile<ELFT> &EF = Obj->getELFFile();
llvm::object::DataRefImpl Rel = RelRef.getRawDataRefImpl();
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
switch (RelocationSection->sh_type) {
default:
llvm_unreachable("unexpected relocation section type");
case ELF::SHT_REL:
break;
case ELF::SHT_RELA: {
const Elf_Rela *RelA = Obj->getRela(Rel);
Addend = RelA->r_addend;
break;
}
}

return Addend;
}

template <typename ELFT>
uint32_t getRelocationSymbol(const llvm::object::ELFObjectFile<ELFT> *Obj,
const llvm::object::RelocationRef &RelRef) {
using ELFShdrTy = typename ELFT::Shdr;
uint32_t Symbol = 0;
const llvm::object::ELFFile<ELFT> &EF = Obj->getELFFile();
llvm::object::DataRefImpl Rel = RelRef.getRawDataRefImpl();
const ELFShdrTy *RelocationSection = cantFail(EF.getSection(Rel.d.a));
switch (RelocationSection->sh_type) {
default:
llvm_unreachable("unexpected relocation section type");
case ELF::SHT_REL:
Symbol = Obj->getRel(Rel)->getSymbol(EF.isMips64EL());
break;
case ELF::SHT_RELA:
Symbol = Obj->getRela(Rel)->getSymbol(EF.isMips64EL());
break;
}

return Symbol;
}
} // namespace

namespace llvm {
namespace bolt {
uint32_t getRelocationSymbol(const llvm::object::ELFObjectFileBase *Obj,
const llvm::object::RelocationRef &Rel) {
return ::getRelocationSymbol(cast<llvm::object::ELF64LEObjectFile>(Obj), Rel);
}

int64_t getRelocationAddend(const llvm::object::ELFObjectFileBase *Obj,
const llvm::object::RelocationRef &Rel) {
return ::getRelocationAddend(cast<llvm::object::ELF64LEObjectFile>(Obj), Rel);
}
} // namespace bolt
} // namespace llvm
127 changes: 125 additions & 2 deletions bolt/lib/Passes/IdenticalCodeFolding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "bolt/Passes/IdenticalCodeFolding.h"
#include "bolt/Core/HashUtilities.h"
#include "bolt/Core/ParallelUtilities.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ThreadPool.h"
Expand Down Expand Up @@ -42,6 +43,19 @@ TimeICF("time-icf",
cl::ReallyHidden,
cl::ZeroOrMore,
cl::cat(BoltOptCategory));

cl::opt<bolt::IdenticalCodeFolding::ICFLevel> ICF(
"icf", cl::desc("fold functions with identical code"),
cl::init(bolt::IdenticalCodeFolding::ICFLevel::None),
cl::values(clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "all",
"Enable identical code folding"),
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::All, "",
"Enable identical code folding"),
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::None, "none",
"Disable identical code folding (default)"),
clEnumValN(bolt::IdenticalCodeFolding::ICFLevel::Safe, "safe",
"Enable safe identical code folding")),
cl::ZeroOrMore, cl::ValueOptional, cl::cat(BoltOptCategory));
} // namespace opts

/// Compare two jump tables in 2 functions. The function relies on consistent
Expand Down Expand Up @@ -340,6 +354,113 @@ typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,

namespace llvm {
namespace bolt {
/// Scans symbol table and creates a bit vector of memory addresses of vtables.
void IdenticalCodeFolding::processSymbolTable(const BinaryContext &BC) {
for (auto &[Address, Data] : BC.getBinaryData()) {
// Filter out all symbols that are not vtables.
if (!Data->getName().starts_with("_ZTV"))
continue;
for (uint64_t I = Address / 8, End = I + (Data->getSize() / 8); I < End;
++I)
VtableBitVector.set(I);
}
}
Error IdenticalCodeFolding::processDataRelocations(
BinaryContext &BC, const SectionRef &SecRefRelData,
const bool HasAddressTaken) {
for (const RelocationRef &Rel : SecRefRelData.relocations()) {
if (isInVTable(Rel.getOffset()))
continue;
symbol_iterator SymbolIter = Rel.getSymbol();
const ObjectFile *OwningObj = Rel.getObject();
assert(SymbolIter != OwningObj->symbol_end() &&
"relocation Symbol expected");
const SymbolRef &Symbol = *SymbolIter;
const uint64_t SymbolAddress = cantFail(Symbol.getAddress());
const ELFObjectFileBase *ELFObj = dyn_cast<ELFObjectFileBase>(OwningObj);
if (!ELFObj)
return createFatalBOLTError(
Twine("BOLT-ERROR: Only ELFObjectFileBase is supported"));
const int64_t Addend = getRelocationAddend(ELFObj, Rel);
BinaryFunction *BF = BC.getBinaryFunctionAtAddress(SymbolAddress + Addend);
if (!BF)
continue;
BF->setHasAddressTaken(HasAddressTaken);
}
return Error::success();
}

Error IdenticalCodeFolding::markFunctionsUnsafeToFold(BinaryContext &BC) {
if (!BC.isX86())
BC.outs() << "BOLT-WARNING: safe ICF is only supported for x86\n";
processSymbolTable(BC);
for (const auto &Sec : BC.sections()) {
if (!Sec.hasSectionRef() || !Sec.isRela())
continue;
const SectionRef &SecRef = Sec.getSectionRef();
section_iterator RelocatedSecIter = cantFail(SecRef.getRelocatedSection());
assert(RelocatedSecIter != SecRef.getObject()->section_end() &&
"Relocation section exists without corresponding relocated section");
const SectionRef &RelocatedSecRef = *RelocatedSecIter;
const StringRef RelocatedSectionName = cantFail(RelocatedSecRef.getName());
const bool SkipRelocs =
StringSwitch<bool>(RelocatedSectionName)
.Cases(".plt", ".got.plt", ".eh_frame", ".gcc_except_table",
".fini_array", ".init_array", true)
.Default(false);
if (!RelocatedSecRef.isData() || SkipRelocs)
continue;

Error ErrorStatus =
processDataRelocations(BC, SecRef, /* HasAddressTaken */ true);
if (ErrorStatus)
return ErrorStatus;
}
ErrorOr<BinarySection &> SecRelDataIA =
BC.getUniqueSectionByName(".rela.init_array");
if (SecRelDataIA) {
const SectionRef SecRefRelData = SecRelDataIA->getSectionRef();
Error ErrorStatus =
processDataRelocations(BC, SecRefRelData, /* !HasAddressTaken */ false);
if (ErrorStatus)
return ErrorStatus;
}
ErrorOr<BinarySection &> SecRelDataFIA =
BC.getUniqueSectionByName(".rela.fini_array");
if (SecRelDataFIA) {
const SectionRef SecRefRelData = SecRelDataFIA->getSectionRef();
Error ErrorStatus =
processDataRelocations(BC, SecRefRelData, /* !HasAddressTaken */ false);
if (ErrorStatus)
return ErrorStatus;
}

ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) {
for (const MCInst &Inst : *BB) {
if (!(BC.MIB->isCall(Inst) || BC.MIB->isBranch(Inst)))
BF.processInstructionForFuncReferences(BC, Inst);
}
}
};
ParallelUtilities::PredicateTy SkipFunc =
[&](const BinaryFunction &BF) -> bool {
return BF.getState() != BinaryFunction::State::CFG;
};
ParallelUtilities::runOnEachFunction(
BC, ParallelUtilities::SchedulingPolicy::SP_INST_LINEAR, WorkFun,
SkipFunc, "markUnsafe", /*ForceSequential*/ false, 2);

LLVM_DEBUG({
for (auto &BFIter : BC.getBinaryFunctions()) {
if (!BFIter.second.hasAddressTaken())
continue;
dbgs() << "BOLT-DEBUG: skipping function " << BFIter.second.getOneName()
<< '\n';
}
});
return Error::success();
}

Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
Expand Down Expand Up @@ -385,7 +506,7 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
"ICF breakdown", opts::TimeICF);
for (auto &BFI : BC.getBinaryFunctions()) {
BinaryFunction &BF = BFI.second;
if (!this->shouldOptimize(BF))
if (!shouldOptimize(BF))
continue;
CongruentBuckets[&BF].emplace(&BF);
}
Expand Down Expand Up @@ -475,7 +596,9 @@ Error IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {

LLVM_DEBUG(SinglePass.stopTimer());
};

if (opts::ICF == ICFLevel::Safe)
if (Error Err = markFunctionsUnsafeToFold(BC))
return Err;
hashFunctions();
createCongruentBuckets();

Expand Down
Loading
Loading