Skip to content
Merged
33 changes: 33 additions & 0 deletions lld/ELF/Arch/AArch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,39 @@ uint64_t elf::getAArch64Page(uint64_t expr) {
return expr & ~static_cast<uint64_t>(0xFFF);
}

// A BTI landing pad is a valid target for an indirect branch
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reflow the comment after adopting 80-column wrap?

// when the Branch Target Identification has been enabled.
// As linker generated branches are via x16 the
// BTI landing pads are defined as:
// BTI C, BTI J, BTI JC, PACIASP, PACIBSP.
bool elf::isAArch64BTILandingPad(Symbol &s, int64_t a) {
// PLT entries accessed indirectly have a BTI c.
if (s.isInPlt())
return true;
Defined *d = dyn_cast<Defined>(&s);
if (!isa_and_nonnull<InputSection>(d->section))
// All places that we cannot disassemble are responsible for making
// the target a BTI landing pad.
return true;
InputSection *isec = cast<InputSection>(d->section);
uint64_t off = d->value + a;
// Likely user error, but protect ourselves against out of bounds
// access.
if (off >= isec->getSize())
return true;
const uint8_t *buf = isec->content().begin();
const uint32_t instr = read32le(buf + off);
// All BTI instructions are HINT instructions which all have same encoding
// apart from bits [11:5]
if ((instr & 0xd503201f) == 0xd503201f &&
is_contained({/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f,
/*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f,
/*BTI JC*/ 0xd50324df},
instr))
return true;
return false;
}

namespace {
class AArch64 : public TargetInfo {
public:
Expand Down
23 changes: 23 additions & 0 deletions lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2265,6 +2265,15 @@ std::pair<Thunk *, bool> ThunkCreator::getThunk(InputSection *isec,
return std::make_pair(t, true);
}

std::pair<Thunk *, bool> ThunkCreator::getSyntheticLandingPad(Defined &d,
int64_t a) {
auto [it, isNew] = landingPadsBySectionAndAddend.try_emplace(
{{d.section, d.value}, a}, nullptr);
if (isNew)
it->second = addLandingPadThunk(ctx, d, a);
return std::make_pair(it->second, isNew);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

{it->second, isNew} is more idiomatic post C++11

}

// Return true if the relocation target is an in range Thunk.
// Return false if the relocation is not to a Thunk. If the relocation target
// was originally to a Thunk, but is no longer in range we revert the
Expand Down Expand Up @@ -2348,6 +2357,20 @@ bool ThunkCreator::createThunks(uint32_t pass,
ts = getISDThunkSec(os, isec, isd, rel, src);
ts->addThunk(t);
thunks[t->getThunkTargetSym()] = t;

// When indirect branches are restricted, such as AArch64 BTI
// Thunks may need to target a linker generated landing pad
// instead of the target.
if (t->needsSyntheticLandingPad()) {
Thunk *lpt;
auto &dr = cast<Defined>(t->destination);
std::tie(lpt, isNew) = getSyntheticLandingPad(dr, t->addend);
if (isNew) {
ts = getISThunkSec(cast<InputSection>(dr.section));
ts->addThunk(lpt);
}
t->landingPad = lpt->getThunkTargetSym();
}
}

// Redirect relocation to Thunk, we never go via the PLT to a Thunk
Expand Down
14 changes: 13 additions & 1 deletion lld/ELF/Relocations.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

namespace lld::elf {
struct Ctx;
class Defined;
class Symbol;
class InputSection;
class InputSectionBase;
Expand Down Expand Up @@ -175,6 +176,8 @@ class ThunkCreator {
std::pair<Thunk *, bool> getThunk(InputSection *isec, Relocation &rel,
uint64_t src);

std::pair<Thunk *, bool> getSyntheticLandingPad(Defined &d, int64_t a);

ThunkSection *addThunkSection(OutputSection *os, InputSectionDescription *,
uint64_t off);

Expand All @@ -201,9 +204,18 @@ class ThunkCreator {
// Track InputSections that have an inline ThunkSection placed in front
// an inline ThunkSection may have control fall through to the section below
// so we need to make sure that there is only one of them.
// The Mips LA25 Thunk is an example of an inline ThunkSection.
// The Mips LA25 Thunk is an example of an inline ThunkSection, as is
// the AArch64BTLandingPadThunk.
llvm::DenseMap<InputSection *, ThunkSection *> thunkedSections;

// Record landing pads, generated for a section + offset destination.
// Landling pads are alternative entry points for destinations that need
// to be reached via thunks that use indirect branches. A destination
// needs at most one landing pad as that can be reused by all callers.
llvm::DenseMap<std::pair<std::pair<SectionBase *, uint64_t>, int64_t>,
Thunk *>
landingPadsBySectionAndAddend;

// The number of completed passes of createThunks this permits us
// to do one time initialization on Pass 0 and put a limit on the
// number of times it can be called to prevent infinite loops.
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Target.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ void writePrefixedInstruction(uint8_t *loc, uint64_t insn);
void addPPC64SaveRestore();
uint64_t getPPC64TocBase();
uint64_t getAArch64Page(uint64_t expr);
bool isAArch64BTILandingPad(Symbol &s, int64_t a);
template <typename ELFT> void writeARMCmseImportLib();
uint64_t getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type);
void riscvFinalizeRelax(int passes);
Expand Down
118 changes: 107 additions & 11 deletions lld/ELF/Thunks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,13 +51,20 @@ namespace {
// distance from the thunk to the target is less than 128MB. Long thunks can
// branch to any virtual address and they are implemented in the derived
// classes. This class tries to create a short thunk if the target is in range,
// otherwise it creates a long thunk.
// otherwise it creates a long thunk. When BTI is enabled indirect branches
// must land on a BTI instruction. If the destination does not have a BTI
// instruction mayNeedLandingPad is set to true and Thunk::landingPad points
// to an alternative entry point with a BTI.
class AArch64Thunk : public Thunk {
public:
AArch64Thunk(Ctx &ctx, Symbol &dest, int64_t addend)
: Thunk(ctx, dest, addend) {}
AArch64Thunk(Ctx &ctx, Symbol &dest, int64_t addend, bool mayNeedLandingPad)
: Thunk(ctx, dest, addend), mayNeedLandingPad(mayNeedLandingPad) {}
bool getMayUseShortThunk();
void writeTo(uint8_t *buf) override;
bool needsSyntheticLandingPad() override;

protected:
bool mayNeedLandingPad;

private:
bool mayUseShortThunk = true;
Expand All @@ -67,8 +74,9 @@ class AArch64Thunk : public Thunk {
// AArch64 long range Thunks.
class AArch64ABSLongThunk final : public AArch64Thunk {
public:
AArch64ABSLongThunk(Ctx &ctx, Symbol &dest, int64_t addend)
: AArch64Thunk(ctx, dest, addend) {}
AArch64ABSLongThunk(Ctx &ctx, Symbol &dest, int64_t addend,
bool mayNeedLandingPad)
: AArch64Thunk(ctx, dest, addend, mayNeedLandingPad) {}
uint32_t size() override { return getMayUseShortThunk() ? 4 : 16; }
void addSymbols(ThunkSection &isec) override;

Expand All @@ -78,15 +86,36 @@ class AArch64ABSLongThunk final : public AArch64Thunk {

class AArch64ADRPThunk final : public AArch64Thunk {
public:
AArch64ADRPThunk(Ctx &ctx, Symbol &dest, int64_t addend)
: AArch64Thunk(ctx, dest, addend) {}
AArch64ADRPThunk(Ctx &ctx, Symbol &dest, int64_t addend,
bool mayNeedLandingPad)
: AArch64Thunk(ctx, dest, addend, mayNeedLandingPad) {}
uint32_t size() override { return getMayUseShortThunk() ? 4 : 12; }
void addSymbols(ThunkSection &isec) override;

private:
void writeLong(uint8_t *buf) override;
};

// AArch64 BTI Landing Pad
// When BTI is enabled indirect branches must land on a BTI
// compatible instruction. When the destination does not have a
// BTI compatible instruction a Thunk doing an indirect branch
// targets a Landing Pad Thunk that direct branches to the target.
class AArch64BTILandingPadThunk final : public Thunk {
public:
AArch64BTILandingPadThunk(Ctx &ctx, Symbol &dest, int64_t addend)
: Thunk(ctx, dest, addend) {}

uint32_t size() override { return getMayUseShortThunk() ? 4 : 8; }
void addSymbols(ThunkSection &isec) override;
void writeTo(uint8_t *buf) override;

private:
bool getMayUseShortThunk();
void writeLong(uint8_t *buf);
bool mayUseShortThunk = true;
};

// Base class for ARM thunks.
//
// An ARM thunk may be either short or long. A short thunk is simply a branch
Expand Down Expand Up @@ -545,6 +574,12 @@ void AArch64Thunk::writeTo(uint8_t *buf) {
ctx.target->relocateNoSym(buf, R_AARCH64_CALL26, s - p);
}

bool AArch64Thunk::needsSyntheticLandingPad() {
// Short Thunks use a direct branch, no synthetic landing pad
// required.
return mayNeedLandingPad && !getMayUseShortThunk();
}

// AArch64 long range Thunks.
void AArch64ABSLongThunk::writeLong(uint8_t *buf) {
const uint8_t data[] = {
Expand All @@ -553,7 +588,11 @@ void AArch64ABSLongThunk::writeLong(uint8_t *buf) {
0x00, 0x00, 0x00, 0x00, // L0: .xword S
0x00, 0x00, 0x00, 0x00,
};
uint64_t s = getAArch64ThunkDestVA(destination, addend);
// if mayNeedLandingPad is true then destination is an
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// If

// AArch64BTILandingPadThunk that defines landingPad.
assert(!mayNeedLandingPad || landingPad != nullptr);
uint64_t s = mayNeedLandingPad ? landingPad->getVA(0)
: getAArch64ThunkDestVA(destination, addend);
memcpy(buf, data, sizeof(data));
ctx.target->relocateNoSym(buf + 8, R_AARCH64_ABS64, s);
}
Expand All @@ -577,7 +616,11 @@ void AArch64ADRPThunk::writeLong(uint8_t *buf) {
0x10, 0x02, 0x00, 0x91, // add x16, x16, R_AARCH64_ADD_ABS_LO12_NC(Dest)
0x00, 0x02, 0x1f, 0xd6, // br x16
};
uint64_t s = getAArch64ThunkDestVA(destination, addend);
// if mayNeedLandingPad is true then destination is an
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

// If

// AArch64BTILandingPadThunk that defines landingPad.
assert(!mayNeedLandingPad || landingPad != nullptr);
uint64_t s = mayNeedLandingPad ? landingPad->getVA(0)
: getAArch64ThunkDestVA(destination, addend);
uint64_t p = getThunkTargetSym()->getVA();
memcpy(buf, data, sizeof(data));
ctx.target->relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
Expand All @@ -591,6 +634,47 @@ void AArch64ADRPThunk::addSymbols(ThunkSection &isec) {
addSymbol("$x", STT_NOTYPE, 0, isec);
}

void AArch64BTILandingPadThunk::addSymbols(ThunkSection &isec) {
addSymbol(saver().save("__AArch64BTIThunk_" + destination.getName()),
STT_FUNC, 0, isec);
addSymbol("$x", STT_NOTYPE, 0, isec);
}

void AArch64BTILandingPadThunk::writeTo(uint8_t *buf) {
if (!getMayUseShortThunk()) {
writeLong(buf);
return;
}
write32(buf, 0xd503245f); // BTI c
// Control falls through to target in following section.
}

bool AArch64BTILandingPadThunk::getMayUseShortThunk() {
if (!mayUseShortThunk)
return false;
// If the target is the following instruction then we can fall
// through without the indirect branch.
uint64_t s = destination.getVA(addend);
uint64_t p = getThunkTargetSym()->getVA();
// This function is called before addresses are stable. We need to
// work out the range from the thunk to the next section but the
// address of the start of the next section depends on the size of
// the thunks in the previous pass. s - p + offset == 0 represents
// the first pass where the Thunk and following section are assigned
// the same offset. s - p <= 4 is the last Thunk in the Thunk
// Section.
mayUseShortThunk = (s - p + offset == 0 || s - p <= 4);
return mayUseShortThunk;
}

void AArch64BTILandingPadThunk::writeLong(uint8_t *buf) {
uint64_t s = destination.getVA(addend);
uint64_t p = getThunkTargetSym()->getVA() + 4;
write32(buf, 0xd503245f); // BTI c
write32(buf + 4, 0x14000000); // B S
ctx.target->relocateNoSym(buf + 4, R_AARCH64_CALL26, s - p);
}

// ARM Target Thunks
static uint64_t getARMThunkDestVA(const Symbol &s) {
uint64_t v = s.isInPlt() ? s.getPltVA() : s.getVA();
Expand Down Expand Up @@ -1279,9 +1363,12 @@ static Thunk *addThunkAArch64(Ctx &ctx, RelType type, Symbol &s, int64_t a) {
if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
type != R_AARCH64_PLT32)
fatal("unrecognized relocation type");
bool mayNeedLandingPad =
(ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) &&
!isAArch64BTILandingPad(s, a);
if (ctx.arg.picThunk)
return make<AArch64ADRPThunk>(ctx, s, a);
return make<AArch64ABSLongThunk>(ctx, s, a);
return make<AArch64ADRPThunk>(ctx, s, a, mayNeedLandingPad);
return make<AArch64ABSLongThunk>(ctx, s, a, mayNeedLandingPad);
}

// Creates a thunk for long branches or Thumb-ARM interworking.
Expand Down Expand Up @@ -1495,3 +1582,12 @@ Thunk *elf::addThunk(Ctx &ctx, const InputSection &isec, Relocation &rel) {
llvm_unreachable("add Thunk only supported for ARM, AVR, Mips and PowerPC");
}
}

Thunk *elf::addLandingPadThunk(Ctx &ctx, Symbol &s, int64_t a) {
switch (ctx.arg.emachine) {
case EM_AARCH64:
return make<AArch64BTILandingPadThunk>(ctx, s, a);
default:
llvm_unreachable("add landing pad only supported for AArch64");
}
}
11 changes: 11 additions & 0 deletions lld/ELF/Thunks.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,18 @@ class Thunk {
return true;
}

// Thunks that indirectly branch to targets may need a synthetic landing
// pad generated close to the target. For example AArch64 when BTI is
// enabled.
virtual bool needsSyntheticLandingPad() { return false; }

Defined *getThunkTargetSym() const { return syms[0]; }

Ctx &ctx;
Symbol &destination;
int64_t addend;
// Alternative target when indirect branch to destination can't be used.
Symbol *landingPad = nullptr;
llvm::SmallVector<Defined *, 3> syms;
uint64_t offset = 0;
// The alignment requirement for this Thunk, defaults to the size of the
Expand All @@ -71,6 +78,10 @@ class Thunk {
// ThunkSection.
Thunk *addThunk(Ctx &, const InputSection &isec, Relocation &rel);

// Create a landing pad Thunk for use when indirect branches from Thunks
// are restricted.
Thunk *addLandingPadThunk(Ctx &, Symbol &s, int64_t a);

void writePPC32PltCallStub(Ctx &, uint8_t *buf, uint64_t gotPltVA,
const InputFile *file, int64_t addend);
void writePPC64LoadAndBranch(uint8_t *buf, int64_t offset);
Expand Down
Loading