Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions bolt/include/bolt/Core/MCPlusBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -1536,6 +1536,14 @@ class MCPlusBuilder {
llvm_unreachable("not implemented");
}

/// Match function \p BF to a long veneer for absolute code. Return true if
/// the match was successful and populate \p TargetAddress with an address of
/// the function veneer jumps to.
virtual bool matchAbsLongVeneer(const BinaryFunction &BF,
uint64_t &TargetAddress) const {
llvm_unreachable("not implemented");
}

virtual bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const {
llvm_unreachable("not implemented");
return false;
Expand Down
48 changes: 32 additions & 16 deletions bolt/lib/Passes/VeneerElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,30 +29,47 @@ static llvm::cl::opt<bool>
namespace llvm {
namespace bolt {

static bool isPossibleVeneer(const BinaryFunction &BF) {
return BF.isAArch64Veneer() || BF.getOneName().starts_with("__AArch64");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to also check that this is not PI? eg:

BF.isAArch64Veneer() || 
  (BF.getOneName().starts_with("__AArch64") && BF.getBinaryContext().HasFixedLoadAddress);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can possibly add a check later when we detect an absolute veneer, but then it should be legal to call an absolute-address function even from PIC, right?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@smithp35 made a similar suggestion with mine here (2nd paragraph).

But it looks like there is a more complex scenario: a hypothetical non-PI binary that has mixed PIC and non-PIC segments?

Not sure if my understanding is correct.
In theory at least, it should be possible/legal to do such a call.
But if we have such a veneer in an otherwise PIC segment, wouldn't that make the entire segment a non-PIC? (at least in theory).


Regarding BOLT itself, TMU it assumes that a binary is PIC if the ELF file is not ET_EXEC; it does not keep such information per segment.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think lld is the only tool putting a prefix of __AArch64 onto the symbol, if other tools start doing that too then LLD also has a Thunk later on in the name, although scanning for that is not likely profitable.

The AbsLongThunks must only be generated in the absence of PIC or PIE so if that can be detected by Bolt then it could be used to rule these out. Whether it is worth doing so or not I don't know.

}

Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
if (!opts::EliminateVeneers || !BC.isAArch64())
return Error::success();

std::map<uint64_t, BinaryFunction> &BFs = BC.getBinaryFunctions();
std::unordered_map<const MCSymbol *, const MCSymbol *> VeneerDestinations;
uint64_t VeneersCount = 0;
for (auto &It : BFs) {
BinaryFunction &VeneerFunction = It.second;
if (!VeneerFunction.isAArch64Veneer())
uint64_t NumEliminatedVeneers = 0;
for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
if (!isPossibleVeneer(BF))
continue;

VeneersCount++;
VeneerFunction.setPseudo(true);
MCInst &FirstInstruction = *(VeneerFunction.begin()->begin());
const MCSymbol *VeneerTargetSymbol =
BC.MIB->getTargetSymbol(FirstInstruction, 1);
assert(VeneerTargetSymbol && "Expecting target symbol for instruction");
for (const MCSymbol *Symbol : VeneerFunction.getSymbols())
if (BF.isIgnored())
continue;

const MCSymbol *VeneerTargetSymbol = 0;
uint64_t TargetAddress;
if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) {
if (BinaryFunction *TargetBF =
BC.getBinaryFunctionAtAddress(TargetAddress))
VeneerTargetSymbol = TargetBF->getSymbol();
} else {
MCInst &FirstInstruction = *(BF.begin()->begin());
if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer"))
VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could keep the assertion just for the else case?

assert(VeneerTargetSymbol && "Expecting target symbol for instruction");

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I expect we will get more veneers which I'll add the support for in later PRs.

}

if (!VeneerTargetSymbol)
continue;

for (const MCSymbol *Symbol : BF.getSymbols())
VeneerDestinations[Symbol] = VeneerTargetSymbol;

NumEliminatedVeneers++;
BF.setPseudo(true);
}

BC.outs() << "BOLT-INFO: number of removed linker-inserted veneers: "
<< VeneersCount << "\n";
<< NumEliminatedVeneers << '\n';
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not directly related, but was there a particular reason the other statistic at L102 was a debug message?

In case the output is helpful to understand whether the number of eliminated veneers does match or not the number of patched call-sites, we could maybe combine them and emit them both at L102?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if there was a reason for that separation, but indeed we can combine the messages in a new PR.


// Handle veneers to veneers in case they occur
for (auto &Entry : VeneerDestinations) {
Expand All @@ -65,9 +82,8 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) {
}

uint64_t VeneerCallers = 0;
for (auto &It : BFs) {
BinaryFunction &Function = It.second;
for (BinaryBasicBlock &BB : Function) {
for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
for (BinaryBasicBlock &BB : BF) {
for (MCInst &Instr : BB) {
if (!BC.MIB->isCall(Instr) || BC.MIB->isIndirectCall(Instr))
continue;
Expand Down
64 changes: 64 additions & 0 deletions bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,16 @@
#include "MCTargetDesc/AArch64MCExpr.h"
#include "MCTargetDesc/AArch64MCTargetDesc.h"
#include "Utils/AArch64BaseInfo.h"
#include "bolt/Core/BinaryBasicBlock.h"
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Core/MCPlusBuilder.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCFixupKindInfo.h"
#include "llvm/MC/MCInstBuilder.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Support/DataExtractor.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"

Expand Down Expand Up @@ -1320,6 +1323,67 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
return 3;
}

/// Match the following pattern:
///
/// ADR x16, .L1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this was meant to be LDR?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching it.

/// BR x16
/// L1:
/// .quad Target
///
/// Populate \p TargetAddress with the Target value on successful match.
bool matchAbsLongVeneer(const BinaryFunction &BF,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LLD will attempt to write veneers as "short" veneers whenever possible. This is true for all AArch64 LLD generated veneers. If the distance from the veneer to the target is < 128 MiB then LLD will use
B target instead of the sequence above. LLD won't change the name of the veneer when it does that.

I'd expect "short" veneers to be common for programs that are between 128 MiB and 256 MiB in size.

Maybe a possible enhancement to detect this case.

uint64_t &TargetAddress) const override {
if (BF.size() != 1 || BF.getMaxSize() < 16)
return false;

if (!BF.hasConstantIsland())
return false;

const BinaryBasicBlock &BB = BF.front();
if (BB.size() != 2)
return false;

const MCInst &LDRInst = BB.getInstructionAtIndex(0);
if (LDRInst.getOpcode() != AArch64::LDRXl)
return false;

if (!LDRInst.getOperand(0).isReg() ||
LDRInst.getOperand(0).getReg() != AArch64::X16)
return false;

const MCSymbol *TargetSym = getTargetSymbol(LDRInst, 1);
if (!TargetSym)
return false;

const MCInst &BRInst = BB.getInstructionAtIndex(1);
if (BRInst.getOpcode() != AArch64::BR)
return false;
if (!BRInst.getOperand(0).isReg() ||
BRInst.getOperand(0).getReg() != AArch64::X16)
return false;

const BinaryFunction::IslandInfo &IInfo = BF.getIslandInfo();
if (IInfo.HasDynamicRelocations)
return false;

auto Iter = IInfo.Offsets.find(8);
if (Iter == IInfo.Offsets.end() || Iter->second != TargetSym)
return false;

// Extract the absolute value stored inside the island.
StringRef SectionContents = BF.getOriginSection()->getContents();
StringRef FunctionContents = SectionContents.substr(
BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize());

const BinaryContext &BC = BF.getBinaryContext();
DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian(),
BC.AsmInfo->getCodePointerSize());
uint64_t Offset = 8;
TargetAddress = DE.getAddress(&Offset);

return true;
}

bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const override {
if (!isADRP(Adrp) || !isAddXri(Add))
return false;
Expand Down
51 changes: 51 additions & 0 deletions bolt/test/AArch64/veneer-lld-abs.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
## Check that llvm-bolt correctly recognizes long absolute thunks generated
## by LLD.

# RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o
# RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \
# RUN: -fuse-ld=lld -Wl,-q
# RUN: llvm-objdump -d %t.exe | FileCheck --check-prefix=CHECK-INPUT %s
# RUN: llvm-objcopy --remove-section .rela.mytext %t.exe
# RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true --lite=0
# RUN: llvm-objdump -d -j .text %t.bolt | \
# RUN: FileCheck --check-prefix=CHECK-OUTPUT %s

.text
.balign 4
.global foo
.type foo, %function
foo:
adrp x1, foo
ret
.size foo, .-foo

.section ".mytext", "ax"
.balign 4

.global __AArch64AbsLongThunk_foo
.type __AArch64AbsLongThunk_foo, %function
__AArch64AbsLongThunk_foo:
ldr x16, .L1
br x16
# CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_foo>:
# CHECK-INPUT-NEXT: ldr
# CHECK-INPUT-NEXT: br
.L1:
.quad foo
.size __AArch64AbsLongThunk_foo, .-__AArch64AbsLongThunk_foo

## Check that the thunk was removed from .text and _start() calls foo()
## directly.

# CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_foo

.global _start
.type _start, %function
_start:
# CHECK-INPUT-LABEL: <_start>:
# CHECK-OUTPUT-LABEL: <_start>:
bl __AArch64AbsLongThunk_foo
# CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_foo>
# CHECK-OUTPUT-NEXT: bl {{.*}} <foo>
ret
.size _start, .-_start
Loading