Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
277 changes: 277 additions & 0 deletions lld/ELF/Arch/RISCV.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class RISCV final : public TargetInfo {
void scanSectionImpl(InputSectionBase &, Relocs<RelTy>);
template <class ELFT> void scanSection1(InputSectionBase &);
void scanSection(InputSectionBase &) override;
void writeTableJumpHeader(uint8_t *buf) const override;
void writeTableJumpEntry(uint8_t *buf, const uint64_t symbol) const override;
RelType getDynRel(RelType type) const override;
RelExpr getRelExpr(RelType type, const Symbol &s,
const uint8_t *loc) const override;
Expand Down Expand Up @@ -75,6 +77,7 @@ class RISCV final : public TargetInfo {
#define INTERNAL_R_RISCV_GPREL_S 257
#define INTERNAL_R_RISCV_X0REL_I 258
#define INTERNAL_R_RISCV_X0REL_S 259
#define INTERNAL_R_RISCV_TBJAL 260

const uint64_t dtpOffset = 0x800;

Expand Down Expand Up @@ -274,6 +277,20 @@ void RISCV::writePlt(uint8_t *buf, const Symbol &sym,
write32le(buf + 12, itype(ADDI, 0, 0, 0));
}

void RISCV::writeTableJumpHeader(uint8_t *buf) const {
if (ctx.arg.is64)
write64le(buf, ctx.mainPart->dynamic->getVA());
else
write32le(buf, ctx.mainPart->dynamic->getVA());
}

void RISCV::writeTableJumpEntry(uint8_t *buf, const uint64_t address) const {
if (ctx.arg.is64)
write64le(buf, address);
else
write32le(buf, address);
}

RelType RISCV::getDynRel(RelType type) const {
return type == ctx.target->symbolicRel ? type
: static_cast<RelType>(R_RISCV_NONE);
Expand Down Expand Up @@ -496,6 +513,9 @@ void RISCV::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
return;
}

case INTERNAL_R_RISCV_TBJAL:
return;

case R_RISCV_ADD8:
*loc += val;
return;
Expand Down Expand Up @@ -745,6 +765,32 @@ void elf::initSymbolAnchors(Ctx &ctx) {
}
}

static bool relaxTableJump(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
if (!ctx.in.riscvTableJumpSection ||
!ctx.in.riscvTableJumpSection->isFinalized)
return false;

const uint32_t jalr = read32le(sec.contentMaybeDecompress().data() +
r.offset + (r.type == R_RISCV_JAL ? 0 : 4));
const uint8_t rd = extractBits(jalr, 11, 7);
int tblEntryIndex = -1;
if (rd == X_X0) {
tblEntryIndex = ctx.in.riscvTableJumpSection->getCMJTEntryIndex(r.sym);
} else if (rd == X_RA) {
tblEntryIndex = ctx.in.riscvTableJumpSection->getCMJALTEntryIndex(r.sym);
}

if (tblEntryIndex >= 0) {
sec.relaxAux->relocTypes[i] = INTERNAL_R_RISCV_TBJAL;
sec.relaxAux->writes.push_back(0xA002 |
(tblEntryIndex << 2)); // cm.jt or cm.jalt
remove = (r.type == R_RISCV_JAL ? 2 : 6);
return true;
}
return false;
}

// Relax R_RISCV_CALL/R_RISCV_CALL_PLT auipc+jalr to c.j, c.jal, or jal.
static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
Relocation &r, uint32_t &remove) {
Expand All @@ -767,6 +813,8 @@ static void relaxCall(Ctx &ctx, const InputSection &sec, size_t i, uint64_t loc,
sec.relaxAux->relocTypes[i] = R_RISCV_RVC_JUMP;
sec.relaxAux->writes.push_back(0x2001); // c.jal
remove = 6;
} else if (remove >= 6 && relaxTableJump(ctx, sec, i, loc, r, remove)) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should remove >= 6 be replaced with the remove >= (r.type == R_RISCV_JAL ? 2 : 6)? That matches the possible values relaxTableJump can set remove to.

// relaxTableJump sets remove
} else if (remove >= 4 && isInt<21>(displace)) {
sec.relaxAux->relocTypes[i] = R_RISCV_JAL;
sec.relaxAux->writes.push_back(0x6f | rd << 7); // jal
Expand Down Expand Up @@ -890,6 +938,11 @@ static bool relax(Ctx &ctx, int pass, InputSection &sec) {
relaxCall(ctx, sec, i, loc, r, remove);
}
break;
case R_RISCV_JAL:
if (relaxable(relocs, i)) {
relaxTableJump(ctx, sec, i, loc, r, remove);
}
break;
case R_RISCV_TPREL_HI20:
case R_RISCV_TPREL_ADD:
case R_RISCV_TPREL_LO12_I:
Expand Down Expand Up @@ -1144,6 +1197,12 @@ void RISCV::finalizeRelax(int passes) const {
case INTERNAL_R_RISCV_X0REL_I:
case INTERNAL_R_RISCV_X0REL_S:
break;
case INTERNAL_R_RISCV_TBJAL:
assert(ctx.arg.relaxTbljal);
assert((aux.writes[writesIdx] & 0xfc03) == 0xA002);
skip = 2;
write16le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_RELAX:
// Used by relaxTlsLe to indicate the relocation is ignored.
break;
Expand All @@ -1155,6 +1214,8 @@ void RISCV::finalizeRelax(int passes) const {
skip = 4;
write32le(p, aux.writes[writesIdx++]);
break;
case R_RISCV_64:
Copy link

Copilot AI Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing comment explaining why R_RISCV_64 case is added here. Unlike the R_RISCV_32 case below it which has a clear explanatory comment, this case has no documentation of its purpose or when it's used.

Suggested change
case R_RISCV_64:
case R_RISCV_64:
// No action required for R_RISCV_64; present to suppress handling in relocateAlloc,
// similar to R_RISCV_32, but no data needs to be written here.

Copilot uses AI. Check for mistakes.
break;
case R_RISCV_32:
// Used by relaxTlsLe to write a uint32_t then suppress the handling
// in relocateAlloc.
Expand Down Expand Up @@ -1533,3 +1594,219 @@ template <class ELFT> void RISCV::scanSection1(InputSectionBase &sec) {
void RISCV::scanSection(InputSectionBase &sec) {
invokeELFT(scanSection1, sec);
}

TableJumpSection::TableJumpSection(Ctx &ctx)
: SyntheticSection(ctx, ".riscv.jvt", SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR, tableAlign) {}

void TableJumpSection::addCMJTEntryCandidate(const Symbol *symbol,
int csReduction) {
addEntry(symbol, CMJTEntryCandidates, csReduction);
}

int TableJumpSection::getCMJTEntryIndex(const Symbol *symbol) {
uint32_t index = getIndex(symbol, maxCMJTEntrySize, finalizedCMJTEntries);
return index < finalizedCMJTEntries.size() ? (int)(startCMJTEntryIdx + index)
: -1;
}

void TableJumpSection::addCMJALTEntryCandidate(const Symbol *symbol,
int csReduction) {
addEntry(symbol, CMJALTEntryCandidates, csReduction);
}

int TableJumpSection::getCMJALTEntryIndex(const Symbol *symbol) {
uint32_t index = getIndex(symbol, maxCMJALTEntrySize, finalizedCMJALTEntries);
return index < finalizedCMJALTEntries.size()
? (int)(startCMJALTEntryIdx + index)
: -1;
}

void TableJumpSection::addEntry(
const Symbol *symbol, llvm::DenseMap<const Symbol *, int> &entriesList,
int csReduction) {
entriesList[symbol] += csReduction;
}

uint32_t TableJumpSection::getIndex(
const Symbol *symbol, uint32_t maxSize,
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
&entriesList) {
// Find this symbol in the ordered list of entries if it exists.
assert(maxSize >= entriesList.size() &&
"Finalized vector of entries exceeds maximum");
auto idx = std::find_if(
entriesList.begin(), entriesList.end(),
[symbol](llvm::detail::DenseMapPair<const Symbol *, int> &e) {
return e.first == symbol;
});

if (idx == entriesList.end())
return entriesList.size();
return idx - entriesList.begin();
}

void TableJumpSection::scanTableJumpEntries(const InputSection &sec) const {
for (auto [i, r] : llvm::enumerate(sec.relocations)) {
Defined *definedSymbol = dyn_cast<Defined>(r.sym);
if (!definedSymbol)
continue;
if (i + 1 == sec.relocs().size() ||
sec.relocs()[i + 1].type != R_RISCV_RELAX)
continue;
switch (r.type) {
case R_RISCV_JAL:
case R_RISCV_CALL:
case R_RISCV_CALL_PLT: {
const uint32_t jalr =
read32le(sec.contentMaybeDecompress().data() + r.offset +
(r.type == R_RISCV_JAL ? 0 : 4));
const uint8_t rd = extractBits(jalr, 11, 7);

int csReduction = 6;
if (sec.relaxAux->relocTypes[i] == R_RISCV_RVC_JUMP)
continue;
else if (sec.relaxAux->relocTypes[i] == R_RISCV_JAL)
csReduction = 2;

if (rd == 0)
ctx.in.riscvTableJumpSection->addCMJTEntryCandidate(r.sym, csReduction);
else if (rd == X_RA)
ctx.in.riscvTableJumpSection->addCMJALTEntryCandidate(r.sym,
csReduction);
}
}
}
}

void TableJumpSection::finalizeContents() {
if (isFinalized)
return;
isFinalized = true;

finalizedCMJTEntries = finalizeEntry(CMJTEntryCandidates, maxCMJTEntrySize);
CMJTEntryCandidates.clear();
int32_t CMJTSizeReduction = getSizeReduction();
finalizedCMJALTEntries =
finalizeEntry(CMJALTEntryCandidates, maxCMJALTEntrySize);
CMJALTEntryCandidates.clear();

if (!finalizedCMJALTEntries.empty() &&
getSizeReduction() < CMJTSizeReduction) {
// In memory, the cm.jt table occupies the first 0x20 entries.
// To be able to use the cm.jalt table which comes afterwards
// it is necessary to pad out the cm.jt table.
// Remove cm.jalt entries if the code reduction of cm.jalt is
// smaller than the size of the padding.
finalizedCMJALTEntries.clear();
}
// if table jump still got negative effect, give up.
if (getSizeReduction() <= 0) {
warn("Table Jump Relaxation didn't got any reduction for code size.");
Copy link

Copilot AI Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected grammar: 'didn't got' should be 'didn't get' or 'did not get'.

Suggested change
warn("Table Jump Relaxation didn't got any reduction for code size.");
warn("Table Jump Relaxation didn't get any reduction for code size.");

Copilot uses AI. Check for mistakes.
finalizedCMJTEntries.clear();
}
}

// Sort the map in decreasing order of the amount of code reduction provided
// by the entries. Drop any entries that can't fit in the map from the tail
// end since they provide less code reduction. Drop any entries that cause
// an increase in code size (i.e. the reduction from instruction conversion
// does not cover the code size gain from adding a table entry).
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
TableJumpSection::finalizeEntry(llvm::DenseMap<const Symbol *, int> EntryMap,
uint32_t maxSize) {
auto cmp = [](const llvm::detail::DenseMapPair<const Symbol *, int> &p1,
const llvm::detail::DenseMapPair<const Symbol *, int> &p2) {
return p1.second > p2.second;
};

SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
tempEntryVector;
std::copy(EntryMap.begin(), EntryMap.end(),
std::back_inserter(tempEntryVector));
std::sort(tempEntryVector.begin(), tempEntryVector.end(), cmp);

auto finalizedVector = tempEntryVector;

finalizedVector.resize(maxSize);

// Drop any items that have a negative effect (i.e. increase code size).
while (!finalizedVector.empty()) {
if (finalizedVector.rbegin()->second < ctx.arg.wordsize)
finalizedVector.pop_back();
else
break;
}
return finalizedVector;
}

size_t TableJumpSection::getSize() const {
if (isFinalized) {
if (!finalizedCMJALTEntries.empty())
return (startCMJALTEntryIdx + finalizedCMJALTEntries.size()) *
ctx.arg.wordsize;
return (startCMJTEntryIdx + finalizedCMJTEntries.size()) * ctx.arg.wordsize;
}

if (!CMJALTEntryCandidates.empty())
return (startCMJALTEntryIdx + CMJALTEntryCandidates.size()) *
ctx.arg.wordsize;
return (startCMJTEntryIdx + CMJTEntryCandidates.size()) * ctx.arg.wordsize;
}

int32_t TableJumpSection::getSizeReduction() {
// The total reduction in code size is J + JA - JTS - JAE.
// Where:
// J = number of bytes saved for all the cm.jt instructions emitted
// JA = number of bytes saved for all the cm.jalt instructions emitted
// JTS = size of the part of the table for cm.jt jumps (i.e. 32 x wordsize)
// JAE = number of entries emitted for the cm.jalt jumps x wordsize

int32_t sizeReduction = -getSize();
for (auto entry : finalizedCMJTEntries) {
sizeReduction += entry.second;
}
for (auto entry : finalizedCMJALTEntries) {
sizeReduction += entry.second;
}
return sizeReduction;
}

void TableJumpSection::writeTo(uint8_t *buf) {
if (getSizeReduction() <= 0)
return;
ctx.target->writeTableJumpHeader(buf);
writeEntries(buf + startCMJTEntryIdx * ctx.arg.wordsize,
finalizedCMJTEntries);
if (finalizedCMJALTEntries.size() > 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (finalizedCMJALTEntries.size() > 0) {
if (!finalizedCMJALTEntries.empty()) {

padWords(buf + ((startCMJTEntryIdx + finalizedCMJTEntries.size()) *
ctx.arg.wordsize),
startCMJALTEntryIdx);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't the number of padding words be based startCMJALTEntryIdx - finalizedCMJTEntries.size()?

writeEntries(buf + (startCMJALTEntryIdx * ctx.arg.wordsize),
finalizedCMJALTEntries);
}
}

void TableJumpSection::padWords(uint8_t *buf, const uint8_t maxWordCount) {
for (size_t i = 0; i < maxWordCount; ++i) {
if (ctx.arg.is64)
write64le(buf + i, 0);
else
write32le(buf + i, 0);
Comment on lines +1793 to +1795
Copy link

Copilot AI Nov 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The buffer offset calculation is incorrect. The loop should multiply i by ctx.arg.wordsize (either 8 for 64-bit or 4 for 32-bit). Currently buf + i only increments by single bytes instead of word-sized chunks, causing padding words to overlap and only the first byte of each word to be written.

Suggested change
write64le(buf + i, 0);
else
write32le(buf + i, 0);
write64le(buf + i * ctx.arg.wordsize, 0);
else
write32le(buf + i * ctx.arg.wordsize, 0);

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, this does look weird.

}
}

void TableJumpSection::writeEntries(
uint8_t *buf,
SmallVector<llvm::detail::DenseMapPair<const Symbol *, int>, 0>
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SmallVectorImpl so we don't to repeat the size.

Can it be const reference?

&entriesList) {
for (const auto &entry : entriesList) {
assert(entry.second > 0);
// Use the symbol from in.symTab to ensure we have the final adjusted
// symbol.
if (!entry.first->isDefined())
continue;
ctx.target->writeTableJumpEntry(buf, entry.first->getVA(ctx, 0));
buf += ctx.arg.wordsize;
}
}
3 changes: 3 additions & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class MipsGotSection;
class MipsRldMapSection;
class PPC32Got2Section;
class PPC64LongBranchTargetSection;
class TableJumpSection;
class PltSection;
class RelocationBaseSection;
class RelroPaddingSection;
Expand Down Expand Up @@ -370,6 +371,7 @@ struct Config {
bool resolveGroups;
bool relrGlibc = false;
bool relrPackDynRelocs = false;
bool relaxTbljal;
llvm::DenseSet<llvm::StringRef> saveTempsArgs;
llvm::SmallVector<std::pair<llvm::GlobPattern, uint32_t>, 0> shuffleSections;
bool singleRoRx;
Expand Down Expand Up @@ -582,6 +584,7 @@ struct InStruct {
std::unique_ptr<RelroPaddingSection> relroPadding;
std::unique_ptr<SyntheticSection> armCmseSGSection;
std::unique_ptr<PPC64LongBranchTargetSection> ppc64LongBranchTarget;
std::unique_ptr<TableJumpSection> riscvTableJumpSection;
std::unique_ptr<SyntheticSection> mipsAbiFlags;
std::unique_ptr<MipsGotSection> mipsGot;
std::unique_ptr<SyntheticSection> mipsOptions;
Expand Down
1 change: 1 addition & 0 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1623,6 +1623,7 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
}
ctx.arg.zCombreloc = getZFlag(args, "combreloc", "nocombreloc", true);
ctx.arg.zCopyreloc = getZFlag(args, "copyreloc", "nocopyreloc", true);
ctx.arg.relaxTbljal = args.hasArg(OPT_relax_tbljal);
ctx.arg.zForceBti = hasZOption(args, "force-bti");
ctx.arg.zForceIbt = hasZOption(args, "force-ibt");
ctx.arg.zZicfilp = getZZicfilp(ctx, args);
Expand Down
5 changes: 5 additions & 0 deletions lld/ELF/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,11 @@ defm use_android_relr_tags: BB<"use-android-relr-tags",
"Use SHT_ANDROID_RELR / DT_ANDROID_RELR* tags instead of SHT_RELR / DT_RELR*",
"Use SHT_RELR / DT_RELR* tags (default)">;

def relax_tbljal : FF<"relax-tbljal">,
HelpText<"Enable conversion of call instructions to table "
"jump instruction from the Zcmt extension for "
"frequently called functions (RISC-V only)">;

def pic_veneer: F<"pic-veneer">,
HelpText<"Always generate position independent thunks (veneers)">;

Expand Down
Loading