Skip to content

Commit 0306084

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.6-beta.1
1 parent f175030 commit 0306084

File tree

11 files changed

+335
-4
lines changed

11 files changed

+335
-4
lines changed

lld/ELF/Arch/AArch64.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "Symbols.h"
1212
#include "SyntheticSections.h"
1313
#include "Target.h"
14+
#include "TargetImpl.h"
1415
#include "lld/Common/ErrorHandler.h"
1516
#include "llvm/BinaryFormat/ELF.h"
1617
#include "llvm/Support/Endian.h"
@@ -83,6 +84,7 @@ class AArch64 : public TargetInfo {
8384
uint64_t val) const override;
8485
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
8586
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
87+
void applyBranchToBranchOpt() const override;
8688

8789
private:
8890
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -975,6 +977,62 @@ void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
975977
}
976978
}
977979

980+
static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
981+
Relocation &r) {
982+
// Identify a control transfer relocation for the branch-to-branch
983+
// optimization. A "control transfer relocation" means a B or BL
984+
// target but it also includes relative vtable relocations for example.
985+
//
986+
// We require the relocation type to be JUMP26, CALL26 or PLT32. With a
987+
// relocation type of PLT32 the value may be assumed to be used for branching
988+
// directly to the symbol and the addend is only used to produce the relocated
989+
// value (hence the effective addend is always 0). This is because if a PLT is
990+
// needed the addend will be added to the address of the PLT, and it doesn't
991+
// make sense to branch into the middle of a PLT. For example, relative vtable
992+
// relocations use PLT32 and 0 or a positive value as the addend but still are
993+
// used to branch to the symbol.
994+
//
995+
// With JUMP26 or CALL26 the only reasonable interpretation of a non-zero
996+
// addend is that we are branching to symbol+addend so that becomes the
997+
// effective addend.
998+
if (r.type == R_AARCH64_PLT32)
999+
return 0;
1000+
if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26)
1001+
return r.addend;
1002+
return std::nullopt;
1003+
}
1004+
1005+
static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is,
1006+
uint64_t offset) {
1007+
auto *i = std::lower_bound(
1008+
is.relocations.begin(), is.relocations.end(), offset,
1009+
[](Relocation &r, uint64_t offset) { return r.offset < offset; });
1010+
if (i != is.relocations.end() && i->offset == offset &&
1011+
i->type == R_AARCH64_JUMP26) {
1012+
return {i, i->addend};
1013+
}
1014+
return {nullptr, 0};
1015+
}
1016+
1017+
static void mergeControlTransferRelocations(Relocation &r1,
1018+
const Relocation &r2) {
1019+
r1.expr = r2.expr;
1020+
r1.sym = r2.sym;
1021+
// With PLT32 we must respect the original addend as that affects the value's
1022+
// interpretation. With the other relocation types the original addend is
1023+
// irrelevant because it referred to an offset within the original target
1024+
// section so we overwrite it.
1025+
if (r1.type == R_AARCH64_PLT32)
1026+
r1.addend += r2.addend;
1027+
else
1028+
r1.addend = r2.addend;
1029+
}
1030+
1031+
void AArch64::applyBranchToBranchOpt() const {
1032+
applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend,
1033+
mergeControlTransferRelocations);
1034+
}
1035+
9781036
// AArch64 may use security features in variant PLT sequences. These are:
9791037
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
9801038
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used

lld/ELF/Arch/TargetImpl.h

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
//===- TargetImpl.h ---------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLD_ELF_ARCH_TARGETIMPL_H
10+
#define LLD_ELF_ARCH_TARGETIMPL_H
11+
12+
#include "InputFiles.h"
13+
#include "InputSection.h"
14+
#include "Relocations.h"
15+
#include "Symbols.h"
16+
#include "llvm/BinaryFormat/ELF.h"
17+
18+
namespace lld {
19+
namespace elf {
20+
21+
// getControlTransferAddend: If this relocation is used for control transfer
22+
// instructions (e.g. branch, branch-link or call) or code references (e.g.
23+
// virtual function pointers) and indicates an address-insignificant reference,
24+
// return the effective addend for the relocation, otherwise return
25+
// std::nullopt. The effective addend for a relocation is the addend that is
26+
// used to determine its branch destination.
27+
//
28+
// getBranchInfo: If a control transfer relocation referring to is+offset
29+
// directly transfers control to a relocated branch instruction in the specified
30+
// section, return the relocation for the branch target as well as its effective
31+
// addend (see above). Otherwise return {nullptr, 0}.
32+
//
33+
// mergeControlTransferRelocations: Given r1, a relocation for which
34+
// getControlTransferAddend() returned a value, and r2, a relocation returned by
35+
// getBranchInfo(), modify r1 so that it branches directly to the target of r2.
36+
template <typename GetBranchInfo, typename GetControlTransferAddend,
37+
typename MergeControlTransferRelocations>
38+
inline void applyBranchToBranchOptImpl(
39+
Ctx &ctx, GetBranchInfo getBranchInfo,
40+
GetControlTransferAddend getControlTransferAddend,
41+
MergeControlTransferRelocations mergeControlTransferRelocations) {
42+
// Needs to run serially because it writes to the relocations array as well as
43+
// reading relocations of other sections.
44+
for (ELFFileBase *f : ctx.objectFiles) {
45+
auto getRelocBranchInfo =
46+
[&ctx, &getBranchInfo](Relocation &r,
47+
uint64_t addend) -> std::pair<Relocation *, uint64_t> {
48+
auto *target = dyn_cast_or_null<Defined>(r.sym);
49+
// We don't allow preemptible symbols (may go somewhere else),
50+
// absolute symbols (runtime behavior unknown), non-executable memory
51+
// (ditto) or non-regular sections (no section data).
52+
if (!target || target->isPreemptible || !target->section ||
53+
!(target->section->flags & llvm::ELF::SHF_EXECINSTR) ||
54+
target->section->kind() != SectionBase::Regular)
55+
return {nullptr, 0};
56+
return getBranchInfo(*cast<InputSection>(target->section),
57+
target->value + addend);
58+
};
59+
for (InputSectionBase *s : f->getSections()) {
60+
if (!s)
61+
continue;
62+
for (Relocation &r : s->relocations) {
63+
if (std::optional<uint64_t> addend =
64+
getControlTransferAddend(*cast<InputSection>(s),
65+
r)) {
66+
std::pair<Relocation *, uint64_t> targetAndAddend =
67+
getRelocBranchInfo(r, *addend);
68+
if (targetAndAddend.first) {
69+
while (1) {
70+
std::pair<Relocation *, uint64_t> nextTargetAndAddend =
71+
getRelocBranchInfo(*targetAndAddend.first, targetAndAddend.second);
72+
if (!nextTargetAndAddend.first)
73+
break;
74+
targetAndAddend = nextTargetAndAddend;
75+
}
76+
mergeControlTransferRelocations(r, *targetAndAddend.first);
77+
}
78+
}
79+
}
80+
}
81+
}
82+
}
83+
84+
} // namespace elf
85+
} // namespace lld
86+
87+
#endif

lld/ELF/Arch/X86_64.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "Symbols.h"
1212
#include "SyntheticSections.h"
1313
#include "Target.h"
14+
#include "TargetImpl.h"
1415
#include "lld/Common/ErrorHandler.h"
1516
#include "llvm/BinaryFormat/ELF.h"
1617
#include "llvm/Support/Endian.h"
@@ -50,6 +51,7 @@ class X86_64 : public TargetInfo {
5051
bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,
5152
InputSection *nextIS) const override;
5253
bool relaxOnce(int pass) const override;
54+
void applyBranchToBranchOpt() const override;
5355

5456
private:
5557
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
@@ -1162,6 +1164,58 @@ void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
11621164
}
11631165
}
11641166

1167+
static std::optional<uint64_t> getControlTransferAddend(InputSection &is,
1168+
Relocation &r) {
1169+
// Identify a control transfer relocation for the branch-to-branch
1170+
// optimization. A "control transfer relocation" usually means a CALL or JMP
1171+
// target but it also includes relative vtable relocations for example.
1172+
//
1173+
// We require the relocation type to be PLT32. With a relocation type of PLT32
1174+
// the value may be assumed to be used for branching directly to the symbol
1175+
// and the addend is only used to produce the relocated value (hence the
1176+
// effective addend is always 0). This is because if a PLT is needed the
1177+
// addend will be added to the address of the PLT, and it doesn't make sense
1178+
// to branch into the middle of a PLT. For example, relative vtable
1179+
// relocations use PLT32 and 0 or a positive value as the addend but still are
1180+
// used to branch to the symbol.
1181+
if (r.type == R_X86_64_PLT32)
1182+
return 0;
1183+
return std::nullopt;
1184+
}
1185+
1186+
static std::pair<Relocation *, uint64_t> getBranchInfo(InputSection &is,
1187+
uint64_t offset) {
1188+
auto content = is.contentMaybeDecompress();
1189+
if (content.size() > offset && content[offset] == 0xe9) { // JMP immediate
1190+
auto *i = std::lower_bound(
1191+
is.relocations.begin(), is.relocations.end(), offset + 1,
1192+
[](Relocation &r, uint64_t offset) { return r.offset < offset; });
1193+
// Unlike with getControlTransferAddend() it is valid to accept a PC32
1194+
// relocation here because we know that this is actually a JMP and not some
1195+
// other reference, so the interpretation is that we add 4 to the addend and
1196+
// use that as the effective addend.
1197+
if (i != is.relocations.end() && i->offset == offset + 1 &&
1198+
(i->type == R_X86_64_PC32 || i->type == R_X86_64_PLT32)) {
1199+
return {i, i->addend + 4};
1200+
}
1201+
}
1202+
return {nullptr, 0};
1203+
}
1204+
1205+
static void mergeControlTransferRelocations(Relocation &r1,
1206+
const Relocation &r2) {
1207+
r1.expr = r2.expr;
1208+
r1.sym = r2.sym;
1209+
// The +4 is here to compensate for r2.addend which will likely be -4,
1210+
// but may also be addend-4 in case of a PC32 branch to symbol+addend.
1211+
r1.addend += r2.addend + 4;
1212+
}
1213+
1214+
void X86_64::applyBranchToBranchOpt() const {
1215+
applyBranchToBranchOptImpl(ctx, getBranchInfo, getControlTransferAddend,
1216+
mergeControlTransferRelocations);
1217+
}
1218+
11651219
// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT
11661220
// entries containing endbr64 instructions. A PLT entry will be split into two
11671221
// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).

lld/ELF/Config.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ struct Config {
276276
bool bpFunctionOrderForCompression = false;
277277
bool bpDataOrderForCompression = false;
278278
bool bpVerboseSectionOrderer = false;
279+
bool branchToBranch = false;
279280
bool checkSections;
280281
bool checkDynamicRelocs;
281282
std::optional<llvm::DebugCompressionType> compressDebugSections;

lld/ELF/Driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,6 +1589,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
15891589
ctx.arg.zWxneeded = hasZOption(args, "wxneeded");
15901590
setUnresolvedSymbolPolicy(ctx, args);
15911591
ctx.arg.power10Stubs = args.getLastArgValue(OPT_power10_stubs_eq) != "no";
1592+
ctx.arg.branchToBranch = args.hasFlag(
1593+
OPT_branch_to_branch, OPT_no_branch_to_branch, ctx.arg.optimize >= 2);
15921594

15931595
if (opt::Arg *arg = args.getLastArg(OPT_eb, OPT_el)) {
15941596
if (arg->getOption().matches(OPT_eb))

lld/ELF/Options.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ def build_id: J<"build-id=">, HelpText<"Generate build ID note">,
5959
MetaVarName<"[fast,md5,sha1,uuid,0x<hexstring>]">;
6060
def : F<"build-id">, Alias<build_id>, AliasArgs<["sha1"]>, HelpText<"Alias for --build-id=sha1">;
6161

62+
defm branch_to_branch: B<"branch-to-branch",
63+
"Enable branch-to-branch optimization (default at -O2)",
64+
"Disable branch-to-branch optimization (default at -O0 and -O1)">;
65+
6266
defm check_sections: B<"check-sections",
6367
"Check section addresses for overlaps (default)",
6468
"Do not check section addresses for overlaps">;

lld/ELF/Relocations.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1671,9 +1671,10 @@ void RelocationScanner::scan(Relocs<RelTy> rels) {
16711671
}
16721672

16731673
// Sort relocations by offset for more efficient searching for
1674-
// R_RISCV_PCREL_HI20 and R_PPC64_ADDR64.
1674+
// R_RISCV_PCREL_HI20, R_PPC64_ADDR64 and the branch-to-branch optimization.
16751675
if (ctx.arg.emachine == EM_RISCV ||
1676-
(ctx.arg.emachine == EM_PPC64 && sec->name == ".toc"))
1676+
(ctx.arg.emachine == EM_PPC64 && sec->name == ".toc") ||
1677+
ctx.arg.branchToBranch)
16771678
llvm::stable_sort(sec->relocs(),
16781679
[](const Relocation &lhs, const Relocation &rhs) {
16791680
return lhs.offset < rhs.offset;
@@ -1964,6 +1965,9 @@ void elf::postScanRelocations(Ctx &ctx) {
19641965
for (ELFFileBase *file : ctx.objectFiles)
19651966
for (Symbol *sym : file->getLocalSymbols())
19661967
fn(*sym);
1968+
1969+
if (ctx.arg.branchToBranch)
1970+
ctx.target->applyBranchToBranchOpt();
19671971
}
19681972

19691973
static bool mergeCmp(const InputSection *a, const InputSection *b) {

lld/ELF/Target.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ class TargetInfo {
101101

102102
virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
103103
JumpModType val) const {}
104+
virtual void applyBranchToBranchOpt() const {}
104105

105106
virtual ~TargetInfo();
106107

lld/docs/ld.lld.1

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,10 @@ Bind default visibility defined STB_GLOBAL function symbols locally for
9393
.Fl shared.
9494
.It Fl --be8
9595
Write a Big Endian ELF File using BE8 format(AArch32 only)
96+
.It Fl -branch-to-branch
97+
Enable the branch-to-branch optimizations: a branch whose target is
98+
another branch instruction is rewritten to point to the latter branch
99+
target (AArch64 and X86_64 only). Enabled by default at -O2.
96100
.It Fl -build-id Ns = Ns Ar value
97101
Generate a build ID note.
98102
.Ar value
@@ -414,7 +418,7 @@ If not specified,
414418
.Dv a.out
415419
is used as a default.
416420
.It Fl O Ns Ar value
417-
Optimize output file size.
421+
Optimize output file.
418422
.Ar value
419423
may be:
420424
.Pp
@@ -424,7 +428,7 @@ Disable string merging.
424428
.It Cm 1
425429
Enable string merging.
426430
.It Cm 2
427-
Enable string tail merging.
431+
Enable string tail merging and branch-to-branch optimization.
428432
.El
429433
.Pp
430434
.Fl O Ns Cm 1
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# REQUIRES: aarch64
2+
3+
## Test that the branch-to-branch optimization follows the links
4+
## from f1 -> f2 -> f3 and updates all references to point to f3.
5+
6+
# RUN: llvm-mc -filetype=obj -triple=aarch64-pc-linux %s -o %t.o
7+
# RUN: ld.lld %t.o -o %t --branch-to-branch
8+
# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
9+
# RUN: ld.lld %t.o -o %t -O2
10+
# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=B2B %s
11+
12+
## Test that branch-to-branch is disabled by default.
13+
14+
# RUN: ld.lld %t.o -o %t
15+
# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
16+
# RUN: ld.lld %t.o -o %t -O2 --no-branch-to-branch
17+
# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
18+
19+
## Test that branch-to-branch is disabled for preemptible symbols.
20+
21+
# RUN: ld.lld %t.o -o %t --branch-to-branch -shared
22+
# RUN: llvm-objdump -d -s %t | FileCheck --check-prefix=NOB2B %s
23+
24+
.section .rodata.vtable,"a"
25+
.globl vtable
26+
vtable:
27+
# B2B: Contents of section .rodata:
28+
# B2B-NEXT: [[VF:[0-9a-f]{8}]]
29+
.4byte f1@PLT - vtable
30+
# B2B-SAME: [[VF]]
31+
.4byte f2@PLT - vtable
32+
# B2B-SAME: [[VF]]
33+
.4byte f3@PLT - vtable
34+
35+
.section .text._start,"ax"
36+
.globl _start
37+
_start:
38+
# B2B: bl {{.*}} <f3>
39+
# NOB2B: bl {{.*}} <f1{{.*}}>
40+
bl f1
41+
# B2B: b {{.*}} <f3>
42+
# NOB2B: b {{.*}} <f2{{.*}}>
43+
b f2
44+
45+
.section .text.f1,"ax"
46+
.globl f1
47+
f1:
48+
b f2
49+
50+
.section .text.f2,"ax"
51+
.globl f2
52+
f2:
53+
b f3
54+
55+
.section .text.f3,"ax"
56+
.globl f3
57+
f3:
58+
ret

0 commit comments

Comments
 (0)