Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
ea9fea2
Relax PCHi20Lo12.
ylzsx Dec 27, 2024
95f4540
la.pcrel relax test modify.
ylzsx Dec 24, 2024
7b133c2
Add test for PCHi20Lo12
ylzsx Dec 27, 2024
abc1a45
Add test for got symbols relaxation.
ylzsx Dec 30, 2024
1b1804e
Modify test. NFC
ylzsx Jan 15, 2025
30cb382
Add check for register.
ylzsx Jan 20, 2025
f1f995b
Relax call36/tail36.
ylzsx Dec 26, 2024
f227ae5
modify test for call36/tail36.
ylzsx Dec 27, 2024
f2aae15
Modify test. Add the option --relax.
ylzsx Jan 16, 2025
7993434
Relax TLS LE/GD/LD.
ylzsx Dec 27, 2024
1e9aa52
Add test for TLSLD/TLSGD when relax enabled.
ylzsx Dec 29, 2024
91da25e
Modify test for TLSLE when relax enabled.
ylzsx Dec 31, 2024
2066c5f
Add test for loongarch-relax-tls-le.s and modify loongarch-relax-emit…
ylzsx Dec 31, 2024
b57c40e
Modify test. Add --relax option.
ylzsx Jan 16, 2025
924d511
Fixes for reviews.
ylzsx Feb 14, 2025
b9c2ea1
Revert "Modify test. Add the option --relax."
ylzsx Feb 14, 2025
1101829
Fixes for reviews.
ylzsx Feb 14, 2025
b3900f6
Merge branch 'users/ylzsx/r-pchi20lo12' into users/ylzsx/r-call36
ylzsx Feb 14, 2025
a77197a
Merge branch 'users/ylzsx/r-call36' into users/ylzsx/r-tls-noie
ylzsx Feb 14, 2025
1192441
Revert "Modify test. Add --relax option."
ylzsx Feb 14, 2025
e627784
Remove unnecessary spaces.
ylzsx Feb 14, 2025
26c1e0c
Fixes for reviews.
ylzsx Feb 17, 2025
7dc7264
Merge branch 'main' into users/ylzsx/r-tls-noie
ylzsx Mar 9, 2025
dcf857e
Merge branch 'main' into users/ylzsx/r-tls-noie
ylzsx Mar 10, 2025
1484f94
revise indentation.
ylzsx Mar 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
220 changes: 218 additions & 2 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,13 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
LD_D = 0x28c00000,
JIRL = 0x4c000000,
B = 0x50000000,
BL = 0x54000000,
};

enum Reg {
Expand Down Expand Up @@ -131,6 +134,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
}

static uint32_t getD5(uint64_t v) { return extractBits(v, 4, 0); }

static uint32_t getJ5(uint64_t v) { return extractBits(v, 9, 5); }

static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
uint32_t immLo = extractBits(imm, 15, 0);
uint32_t immHi = extractBits(imm, 20, 16);
Expand All @@ -147,6 +154,10 @@ static uint32_t setJ20(uint32_t insn, uint32_t imm) {
return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
}

static uint32_t setJ5(uint32_t insn, uint32_t imm) {
return (insn & 0xfffffc1f) | (extractBits(imm, 4, 0) << 5);
}

static uint32_t setK12(uint32_t insn, uint32_t imm) {
return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
}
Expand Down Expand Up @@ -743,6 +754,157 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}

static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
}

static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
return relaxable(relocs, i) && relaxable(relocs, i + 2) &&
relocs[i].offset + 4 == relocs[i + 2].offset;
}

// Relax code sequence.
// From:
// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym)
// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
// To:
// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
//
// From:
// pcalau12i $a0, %got_pc_hi20(sym_got)
// ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
// To:
// pcaddi $a0, %got_pc_hi20(sym_got)
static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &rHi20, Relocation &rLo12,
uint32_t &remove) {
// check if the relocations are relaxable sequences.
if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
rLo12.type == R_LARCH_PCALA_LO12) ||
(rHi20.type == R_LARCH_GOT_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12)))
return;

// GOT references to absolute symbols can't be relaxed to use pcaddi in
// position-independent code, because these instructions produce a relative
// address.
// Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
// these symbols may be resolve in runtime.
if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
(!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
rHi20.sym->isGnuIFunc() ||
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section)))
return;

uint64_t dest = 0;
if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
dest = rHi20.sym->getPltVA(ctx);
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
dest = rHi20.sym->getVA(ctx);
else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
dest = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
else {
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
<< rHi20.expr << ") against symbol " << rHi20.sym
<< "in relaxPCHi20Lo12";
return;
}
dest += rHi20.addend;

const int64_t displace = dest - loc;
// Check if the displace aligns 4 bytes or exceeds the range of pcaddi.
if ((displace & 0x3) != 0 || !isInt<22>(displace))
return;

// Note: If we can ensure that the .o files generated by LLVM only contain
// relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
// decode instructions. The relaxable instruction sequences imply the
// following constraints:
// * For relocation pairs related to got_pc, the opcodes of instructions
// must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
// addi.w/d.
// * The destination register of pcalau12i is guaranteed to be used only by
// the immediately following instruction.
const uint32_t currInsn = read32le(sec.content().data() + rHi20.offset);
const uint32_t nextInsn = read32le(sec.content().data() + rLo12.offset);
// Check if use the same register.
if (getD5(currInsn) != getJ5(nextInsn) || getJ5(nextInsn) != getD5(nextInsn))
return;

sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
if (rHi20.type == R_LARCH_TLS_GD_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
else
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
remove = 4;
}

// Relax code sequence.
// From:
// pcaddu18i $ra, %call36(foo)
// jirl $ra, $ra, 0
// To:
// b/bl foo
static void relaxCall36(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
const uint64_t dest =
(r.expr == R_PLT_PC ? r.sym->getPltVA(ctx) : r.sym->getVA(ctx)) +
r.addend;

const int64_t displace = dest - loc;
// Check if the displace aligns 4 bytes or exceeds the range of b[l].
if ((displace & 0x3) != 0 || !isInt<28>(displace))
return;

const uint32_t nextInsn = read32le(sec.content().data() + r.offset + 4);
if (getD5(nextInsn) == R_RA) {
// convert jirl to bl
sec.relaxAux->relocTypes[i] = R_LARCH_B26;
sec.relaxAux->writes.push_back(insn(BL, 0, 0, 0));
remove = 4;
} else if (getD5(nextInsn) == R_ZERO) {
// convert jirl to b
sec.relaxAux->relocTypes[i] = R_LARCH_B26;
sec.relaxAux->writes.push_back(insn(B, 0, 0, 0));
remove = 4;
}
}

// Relax code sequence.
// From:
// lu12i.w $rd, %le_hi20_r(sym)
// add.w/d $rd, $rd, $tp, %le_add_r(sym)
// addi/ld/st.w/d $rd, $rd, %le_lo12_r(sym)
// To:
// addi/ld/st.w/d $rd, $tp, %le_lo12_r(sym)
static void relaxTlsLe(Ctx &ctx, const InputSection &sec, size_t i,
uint64_t loc, Relocation &r, uint32_t &remove) {
uint64_t val = r.sym->getVA(ctx, r.addend);
// Check if the val exceeds the range of addi/ld/st.
if (!isInt<12>(val))
return;
uint32_t currInsn = read32le(sec.content().data() + r.offset);
switch (r.type) {
case R_LARCH_TLS_LE_HI20_R:
case R_LARCH_TLS_LE_ADD_R:
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
remove = 4;
break;
case R_LARCH_TLS_LE_LO12_R:
sec.relaxAux->writes.push_back(setJ5(currInsn, R_TP));
sec.relaxAux->relocTypes[i] = R_LARCH_TLS_LE_LO12_R;
break;
}
}

static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
Expand Down Expand Up @@ -781,6 +943,24 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
break;
}
case R_LARCH_PCALA_HI20:
case R_LARCH_GOT_PC_HI20:
case R_LARCH_TLS_GD_PC_HI20:
case R_LARCH_TLS_LD_PC_HI20:
// The overflow check for i+2 will be carried out in isPairRelaxable.
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
break;
case R_LARCH_CALL36:
if (relaxable(relocs, i))
relaxCall36(ctx, sec, i, loc, r, remove);
break;
case R_LARCH_TLS_LE_HI20_R:
case R_LARCH_TLS_LE_ADD_R:
case R_LARCH_TLS_LE_LO12_R:
if (relaxable(relocs, i))
relaxTlsLe(ctx, sec, i, loc, r, remove);
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -851,6 +1031,7 @@ void LoongArch::finalizeRelax(int passes) const {
MutableArrayRef<Relocation> rels = sec->relocs();
ArrayRef<uint8_t> old = sec->content();
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
size_t writesIdx = 0;
uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(newSize);
uint64_t offset = 0;
int64_t delta = 0;
Expand All @@ -867,11 +1048,46 @@ void LoongArch::finalizeRelax(int passes) const {
continue;

// Copy from last location to the current relocated location.
const Relocation &r = rels[i];
Relocation &r = rels[i];
uint64_t size = r.offset - offset;
memcpy(p, old.data() + offset, size);
p += size;
offset = r.offset + remove;

int64_t skip = 0;
if (RelType newType = aux.relocTypes[i]) {
switch (newType) {
case R_LARCH_RELAX:
break;
case R_LARCH_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
// RelExpr is needed for relocating.
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
break;
case R_LARCH_B26:
case R_LARCH_TLS_LE_LO12_R:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
break;
case R_LARCH_TLS_GD_PCREL20_S2:
// Note: R_LARCH_TLS_LD_PCREL20_S2 must also use R_TLSGD_PC instead
// of R_TLSLD_PC due to historical reasons. In fact, right now TLSLD
// behaves exactly like TLSGD on LoongArch.
//
// This reason has also been mentioned in mold commit:
// https://github.com/rui314/mold/commit/5dfa1cf07c03bd57cb3d493b652ef22441bcd71c
case R_LARCH_TLS_LD_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
r.expr = R_TLSGD_PC;
break;
default:
llvm_unreachable("unsupported type");
}
}

p += skip;
offset = r.offset + skip + remove;
}
memcpy(p, old.data() + offset, old.size() - offset);

Expand Down
Loading