-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[BOLT][AArch64] Add support for long absolute LLD thunks/veneers #113408
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,30 +29,47 @@ static llvm::cl::opt<bool> | |
| namespace llvm { | ||
| namespace bolt { | ||
|
|
||
| static bool isPossibleVeneer(const BinaryFunction &BF) { | ||
| return BF.isAArch64Veneer() || BF.getOneName().starts_with("__AArch64"); | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think lld is the only tool putting a prefix of The AbsLongThunks must only be generated in the absence of PIC or PIE so if that can be detected by Bolt then it could be used to rule these out. Whether it is worth doing so or not I don't know. |
||
| } | ||
|
|
||
| Error VeneerElimination::runOnFunctions(BinaryContext &BC) { | ||
| if (!opts::EliminateVeneers || !BC.isAArch64()) | ||
| return Error::success(); | ||
|
|
||
| std::map<uint64_t, BinaryFunction> &BFs = BC.getBinaryFunctions(); | ||
| std::unordered_map<const MCSymbol *, const MCSymbol *> VeneerDestinations; | ||
| uint64_t VeneersCount = 0; | ||
| for (auto &It : BFs) { | ||
| BinaryFunction &VeneerFunction = It.second; | ||
| if (!VeneerFunction.isAArch64Veneer()) | ||
| uint64_t NumEliminatedVeneers = 0; | ||
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { | ||
| if (!isPossibleVeneer(BF)) | ||
| continue; | ||
|
|
||
| VeneersCount++; | ||
| VeneerFunction.setPseudo(true); | ||
| MCInst &FirstInstruction = *(VeneerFunction.begin()->begin()); | ||
| const MCSymbol *VeneerTargetSymbol = | ||
| BC.MIB->getTargetSymbol(FirstInstruction, 1); | ||
| assert(VeneerTargetSymbol && "Expecting target symbol for instruction"); | ||
| for (const MCSymbol *Symbol : VeneerFunction.getSymbols()) | ||
| if (BF.isIgnored()) | ||
| continue; | ||
|
|
||
| const MCSymbol *VeneerTargetSymbol = 0; | ||
| uint64_t TargetAddress; | ||
| if (BC.MIB->matchAbsLongVeneer(BF, TargetAddress)) { | ||
| if (BinaryFunction *TargetBF = | ||
| BC.getBinaryFunctionAtAddress(TargetAddress)) | ||
| VeneerTargetSymbol = TargetBF->getSymbol(); | ||
| } else { | ||
| MCInst &FirstInstruction = *(BF.begin()->begin()); | ||
| if (BC.MIB->hasAnnotation(FirstInstruction, "AArch64Veneer")) | ||
| VeneerTargetSymbol = BC.MIB->getTargetSymbol(FirstInstruction, 1); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe we could keep the assertion just for the else case? assert(VeneerTargetSymbol && "Expecting target symbol for instruction");
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I expect we will get more veneers which I'll add the support for in later PRs. |
||
| } | ||
|
|
||
| if (!VeneerTargetSymbol) | ||
| continue; | ||
|
|
||
| for (const MCSymbol *Symbol : BF.getSymbols()) | ||
| VeneerDestinations[Symbol] = VeneerTargetSymbol; | ||
|
|
||
| NumEliminatedVeneers++; | ||
| BF.setPseudo(true); | ||
| } | ||
|
|
||
| BC.outs() << "BOLT-INFO: number of removed linker-inserted veneers: " | ||
| << VeneersCount << "\n"; | ||
| << NumEliminatedVeneers << '\n'; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not directly related, but was there a particular reason the other statistic at L102 was a debug message? In case the output is helpful to understand whether the number of eliminated veneers does match or not the number of patched call-sites, we could maybe combine them and emit them both at L102?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't know if there was a reason for that separation, but indeed we can combine the messages in a new PR. |
||
|
|
||
| // Handle veneers to veneers in case they occur | ||
| for (auto &Entry : VeneerDestinations) { | ||
|
|
@@ -65,9 +82,8 @@ Error VeneerElimination::runOnFunctions(BinaryContext &BC) { | |
| } | ||
|
|
||
| uint64_t VeneerCallers = 0; | ||
| for (auto &It : BFs) { | ||
| BinaryFunction &Function = It.second; | ||
| for (BinaryBasicBlock &BB : Function) { | ||
| for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) { | ||
| for (BinaryBasicBlock &BB : BF) { | ||
| for (MCInst &Instr : BB) { | ||
| if (!BC.MIB->isCall(Instr) || BC.MIB->isIndirectCall(Instr)) | ||
| continue; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -15,13 +15,16 @@ | |
| #include "MCTargetDesc/AArch64MCExpr.h" | ||
| #include "MCTargetDesc/AArch64MCTargetDesc.h" | ||
| #include "Utils/AArch64BaseInfo.h" | ||
| #include "bolt/Core/BinaryBasicBlock.h" | ||
| #include "bolt/Core/BinaryFunction.h" | ||
| #include "bolt/Core/MCPlusBuilder.h" | ||
| #include "llvm/BinaryFormat/ELF.h" | ||
| #include "llvm/MC/MCContext.h" | ||
| #include "llvm/MC/MCFixupKindInfo.h" | ||
| #include "llvm/MC/MCInstBuilder.h" | ||
| #include "llvm/MC/MCInstrInfo.h" | ||
| #include "llvm/MC/MCRegisterInfo.h" | ||
| #include "llvm/Support/DataExtractor.h" | ||
| #include "llvm/Support/Debug.h" | ||
| #include "llvm/Support/ErrorHandling.h" | ||
|
|
||
|
|
@@ -1320,6 +1323,67 @@ class AArch64MCPlusBuilder : public MCPlusBuilder { | |
| return 3; | ||
| } | ||
|
|
||
| /// Match the following pattern: | ||
| /// | ||
| /// ADR x16, .L1 | ||
|
||
| /// BR x16 | ||
| /// L1: | ||
| /// .quad Target | ||
| /// | ||
| /// Populate \p TargetAddress with the Target value on successful match. | ||
| bool matchAbsLongVeneer(const BinaryFunction &BF, | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. LLD will attempt to write veneers as "short" veneers whenever possible. This is true for all AArch64 LLD generated veneers. If the distance from the veneer to the target is < 128 MiB then LLD will use I'd expect "short" veneers to be common for programs that are between 128 MiB and 256 MiB in size. Maybe a possible enhancement to detect this case. |
||
| uint64_t &TargetAddress) const override { | ||
| if (BF.size() != 1 || BF.getMaxSize() < 16) | ||
| return false; | ||
|
|
||
| if (!BF.hasConstantIsland()) | ||
| return false; | ||
|
|
||
| const BinaryBasicBlock &BB = BF.front(); | ||
| if (BB.size() != 2) | ||
| return false; | ||
|
|
||
| const MCInst &LDRInst = BB.getInstructionAtIndex(0); | ||
| if (LDRInst.getOpcode() != AArch64::LDRXl) | ||
| return false; | ||
|
|
||
| if (!LDRInst.getOperand(0).isReg() || | ||
| LDRInst.getOperand(0).getReg() != AArch64::X16) | ||
| return false; | ||
|
|
||
| const MCSymbol *TargetSym = getTargetSymbol(LDRInst, 1); | ||
| if (!TargetSym) | ||
| return false; | ||
|
|
||
| const MCInst &BRInst = BB.getInstructionAtIndex(1); | ||
| if (BRInst.getOpcode() != AArch64::BR) | ||
| return false; | ||
| if (!BRInst.getOperand(0).isReg() || | ||
| BRInst.getOperand(0).getReg() != AArch64::X16) | ||
| return false; | ||
|
|
||
| const BinaryFunction::IslandInfo &IInfo = BF.getIslandInfo(); | ||
| if (IInfo.HasDynamicRelocations) | ||
| return false; | ||
|
|
||
| auto Iter = IInfo.Offsets.find(8); | ||
| if (Iter == IInfo.Offsets.end() || Iter->second != TargetSym) | ||
| return false; | ||
|
|
||
| // Extract the absolute value stored inside the island. | ||
| StringRef SectionContents = BF.getOriginSection()->getContents(); | ||
| StringRef FunctionContents = SectionContents.substr( | ||
| BF.getAddress() - BF.getOriginSection()->getAddress(), BF.getMaxSize()); | ||
|
|
||
| const BinaryContext &BC = BF.getBinaryContext(); | ||
| DataExtractor DE(FunctionContents, BC.AsmInfo->isLittleEndian(), | ||
| BC.AsmInfo->getCodePointerSize()); | ||
| uint64_t Offset = 8; | ||
| TargetAddress = DE.getAddress(&Offset); | ||
|
|
||
| return true; | ||
| } | ||
|
|
||
| bool matchAdrpAddPair(const MCInst &Adrp, const MCInst &Add) const override { | ||
| if (!isADRP(Adrp) || !isAddXri(Add)) | ||
| return false; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,51 @@ | ||
| ## Check that llvm-bolt correctly recognizes long absolute thunks generated | ||
| ## by LLD. | ||
|
|
||
| # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o | ||
| # RUN: %clang %cflags -fno-PIC -no-pie %t.o -o %t.exe -nostdlib \ | ||
| # RUN: -fuse-ld=lld -Wl,-q | ||
| # RUN: llvm-objdump -d %t.exe | FileCheck --check-prefix=CHECK-INPUT %s | ||
| # RUN: llvm-objcopy --remove-section .rela.mytext %t.exe | ||
| # RUN: llvm-bolt %t.exe -o %t.bolt --elim-link-veneers=true --lite=0 | ||
| # RUN: llvm-objdump -d -j .text %t.bolt | \ | ||
| # RUN: FileCheck --check-prefix=CHECK-OUTPUT %s | ||
|
|
||
| .text | ||
| .balign 4 | ||
| .global foo | ||
| .type foo, %function | ||
| foo: | ||
| adrp x1, foo | ||
| ret | ||
| .size foo, .-foo | ||
|
|
||
| .section ".mytext", "ax" | ||
| .balign 4 | ||
|
|
||
| .global __AArch64AbsLongThunk_foo | ||
| .type __AArch64AbsLongThunk_foo, %function | ||
| __AArch64AbsLongThunk_foo: | ||
| ldr x16, .L1 | ||
| br x16 | ||
| # CHECK-INPUT-LABEL: <__AArch64AbsLongThunk_foo>: | ||
| # CHECK-INPUT-NEXT: ldr | ||
| # CHECK-INPUT-NEXT: br | ||
| .L1: | ||
| .quad foo | ||
| .size __AArch64AbsLongThunk_foo, .-__AArch64AbsLongThunk_foo | ||
|
|
||
| ## Check that the thunk was removed from .text and _start() calls foo() | ||
| ## directly. | ||
|
|
||
| # CHECK-OUTPUT-NOT: __AArch64AbsLongThunk_foo | ||
|
|
||
| .global _start | ||
| .type _start, %function | ||
| _start: | ||
| # CHECK-INPUT-LABEL: <_start>: | ||
| # CHECK-OUTPUT-LABEL: <_start>: | ||
| bl __AArch64AbsLongThunk_foo | ||
| # CHECK-INPUT-NEXT: bl {{.*}} <__AArch64AbsLongThunk_foo> | ||
| # CHECK-OUTPUT-NEXT: bl {{.*}} <foo> | ||
| ret | ||
| .size _start, .-_start |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Would it make sense to also check that this is not PI? eg:
BF.isAArch64Veneer() || (BF.getOneName().starts_with("__AArch64") && BF.getBinaryContext().HasFixedLoadAddress);There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can possibly add a check later when we detect an absolute veneer, but then it should be legal to call an absolute-address function even from PIC, right?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@smithp35 made a similar suggestion with mine here (2nd paragraph).
But it looks like there is a more complex scenario: a hypothetical non-PI binary that has mixed PIC and non-PIC segments?
Not sure if my understanding is correct.
In theory at least, it should be possible/legal to do such a call.
But if we have such a veneer in an otherwise PIC segment, wouldn't that make the entire segment a non-PIC? (at least in theory).
Regarding BOLT itself, TMU it assumes that a binary is PIC if the ELF file is not
ET_EXEC; it does not keep such information per segment.