Skip to content
112 changes: 110 additions & 2 deletions lld/ELF/Arch/LoongArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class LoongArch final : public TargetInfo {
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
bool relaxOnce(int pass) const override;
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
Expand All @@ -53,6 +54,8 @@ enum Op {
ADDI_W = 0x02800000,
ADDI_D = 0x02c00000,
ANDI = 0x03400000,
ORI = 0x03800000,
LU12I_W = 0x14000000,
PCADDI = 0x18000000,
PCADDU12I = 0x1c000000,
LD_W = 0x28800000,
Expand Down Expand Up @@ -766,9 +769,12 @@ static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
// Relax code sequence.
// From:
// pcalau12i $a0, %pc_hi20(sym) | %ld_pc_hi20(sym) | %gd_pc_hi20(sym)
// | %desc_pc_hi20(sym)
// addi.w/d $a0, $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
// | %desc_pc_lo12(sym)
// To:
// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
// pcaddi $a0, %pc_lo12(sym) | %got_pc_lo12(sym) | %got_pc_lo12(sym)
// | %desc_pcrel_20(sym)
//
// From:
// pcalau12i $a0, %got_pc_hi20(sym_got)
Expand All @@ -786,7 +792,9 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
(rHi20.type == R_LARCH_TLS_GD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_LD_PC_HI20 &&
rLo12.type == R_LARCH_GOT_PC_LO12)))
rLo12.type == R_LARCH_GOT_PC_LO12) ||
(rHi20.type == R_LARCH_TLS_DESC_PC_HI20 &&
rLo12.type == R_LARCH_TLS_DESC_PC_LO12)))
return;

// GOT references to absolute symbols can't be relaxed to use pcaddi in
Expand All @@ -808,6 +816,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
dest = rHi20.sym->getVA(ctx);
else if (rHi20.expr == RE_LOONGARCH_TLSGD_PAGE_PC)
dest = ctx.in.got->getGlobalDynAddr(*rHi20.sym);
else if (rHi20.expr == RE_LOONGARCH_TLSDESC_PAGE_PC)
dest = ctx.in.got->getTlsDescAddr(*rHi20.sym);
else {
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
<< rHi20.expr << ") against symbol " << rHi20.sym
Expand Down Expand Up @@ -841,6 +851,8 @@ static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_GD_PCREL20_S2;
else if (rHi20.type == R_LARCH_TLS_LD_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_LD_PCREL20_S2;
else if (rHi20.type == R_LARCH_TLS_DESC_PC_HI20)
sec.relaxAux->relocTypes[i + 2] = R_LARCH_TLS_DESC_PCREL20_S2;
else
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
Expand Down Expand Up @@ -947,6 +959,7 @@ static bool relax(Ctx &ctx, InputSection &sec) {
case R_LARCH_GOT_PC_HI20:
case R_LARCH_TLS_GD_PC_HI20:
case R_LARCH_TLS_LD_PC_HI20:
case R_LARCH_TLS_DESC_PC_HI20:
// The overflow check for i+2 will be carried out in isPairRelaxable.
if (isPairRelaxable(relocs, i))
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
Expand All @@ -961,6 +974,11 @@ static bool relax(Ctx &ctx, InputSection &sec) {
if (relaxable(relocs, i))
relaxTlsLe(ctx, sec, i, loc, r, remove);
break;
case R_LARCH_TLS_IE_PC_HI20:
if (relaxable(relocs, i) && r.expr == R_RELAX_TLS_IE_TO_LE &&
isUInt<12>(r.sym->getVA(ctx, r.addend)))
remove = 4;
break;
}

// For all anchors whose offsets are <= r.offset, they are preceded by
Expand Down Expand Up @@ -992,6 +1010,91 @@ static bool relax(Ctx &ctx, InputSection &sec) {
return changed;
}

// Convert TLS IE to LE in the normal or medium code model.
// Original code sequence:
// * pcalau12i $a0, %ie_pc_hi20(sym)
// * ld.d $a0, $a0, %ie_pc_lo12(sym)
//
// The code sequence converted is as follows:
// * lu12i.w $a0, %le_hi20(sym) # le_hi20 != 0, otherwise NOP
// * ori $a0, src, %le_lo12(sym) # le_hi20 != 0, src = $a0,
// # otherwise, src = $zero
//
// When relaxation enables, redundant NOPs can be removed.
static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
assert(isInt<32>(val) &&
"val exceeds the range of medium code model in tlsIeToLe");

bool isUInt12 = isUInt<12>(val);
const uint32_t currInsn = read32le(loc);
switch (rel.type) {
case R_LARCH_TLS_IE_PC_HI20:
if (isUInt12)
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
else
write32le(loc, insn(LU12I_W, getD5(currInsn), extractBits(val, 31, 12),
0)); // lu12i.w $a0, %le_hi20
break;
case R_LARCH_TLS_IE_PC_LO12:
if (isUInt12)
write32le(loc, insn(ORI, getD5(currInsn), R_ZERO,
val)); // ori $a0, $zero, %le_lo12
else
write32le(loc, insn(ORI, getD5(currInsn), getJ5(currInsn),
lo12(val))); // ori $a0, $a0, %le_lo12
break;
}
}

void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
const unsigned bits = ctx.arg.is64 ? 64 : 32;
uint64_t secAddr = sec.getOutputSection()->addr;
if (auto *s = dyn_cast<InputSection>(&sec))
secAddr += s->outSecOff;
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
secAddr += ehIn->getParent()->outSecOff;
bool isExtreme = false, isRelax = false;
const MutableArrayRef<Relocation> relocs = sec.relocs();
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
Relocation &rel = relocs[i];
uint8_t *loc = buf + rel.offset;
uint64_t val = SignExtend64(
sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);

switch (rel.expr) {
case R_RELAX_HINT:
continue;
case R_RELAX_TLS_IE_TO_LE:
if (rel.type == R_LARCH_TLS_IE_PC_HI20) {
// LoongArch does not support IE to LE optimization in the extreme code
// model. In this case, the relocs are as follows:
//
// * i -- R_LARCH_TLS_IE_PC_HI20
// * i+1 -- R_LARCH_TLS_IE_PC_LO12
// * i+2 -- R_LARCH_TLS_IE64_PC_LO20
// * i+3 -- R_LARCH_TLS_IE64_PC_HI12
isExtreme =
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_IE64_PC_LO20);
}
if (isExtreme) {
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
bits);
relocateNoSym(loc, rel.type, val);
} else {
isRelax = relaxable(relocs, i);
if (isRelax && rel.type == R_LARCH_TLS_IE_PC_HI20 && isUInt<12>(val))
continue;
tlsIeToLe(loc, rel, val);
}
continue;
default:
break;
}
relocate(loc, rel, val);
}
}

// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
// shrinkage may reduce displacement and make more relocations eligible for
Expand Down Expand Up @@ -1081,6 +1184,11 @@ void LoongArch::finalizeRelax(int passes) const {
write32le(p, aux.writes[writesIdx++]);
r.expr = R_TLSGD_PC;
break;
case R_LARCH_TLS_DESC_PCREL20_S2:
skip = 4;
write32le(p, aux.writes[writesIdx++]);
r.expr = R_TLSDESC_PC;
break;
default:
llvm_unreachable("unsupported type");
}
Expand Down
17 changes: 16 additions & 1 deletion lld/ELF/Relocations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1375,14 +1375,20 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch supports IE to LE optimization in non-extreme code model.
bool execOptimizeInLoongArch =
ctx.arg.emachine == EM_LOONGARCH &&
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);

// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
// optimizations.
// RISC-V supports TLSDESC to IE/LE optimizations.
// For PPC64, if the file has missing R_PPC64_TLSGD/R_PPC64_TLSLD, disable
// optimization as well.
bool execOptimize =
!ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
ctx.arg.emachine != EM_HEXAGON && ctx.arg.emachine != EM_LOONGARCH &&
ctx.arg.emachine != EM_HEXAGON &&
(ctx.arg.emachine != EM_LOONGARCH || execOptimizeInLoongArch) &&
!(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
!sec->file->ppc64DisableTLSRelax;

Expand Down Expand Up @@ -1476,6 +1482,15 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
return 1;
}

// LoongArch TLS GD/LD relocs reuse the RE_LOONGARCH_GOT, in which
// NEEDS_TLSIE shouldn't set. So we check independently.
if (ctx.arg.emachine == EM_LOONGARCH && expr == RE_LOONGARCH_GOT &&
execOptimize && isLocalInExecutable) {
ctx.hasTlsIe.store(true, std::memory_order_relaxed);
sec->addReloc({R_RELAX_TLS_IE_TO_LE, type, offset, addend, &sym});
return 1;
}

return 0;
}

Expand Down
92 changes: 92 additions & 0 deletions lld/test/ELF/loongarch-relax-tls-ie.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# REQUIRES: loongarch
## Test LA64 IE -> LE in various cases.

# RUN: llvm-mc --filetype=obj --triple=loongarch64 -mattr=+relax %s -o %t.o

## Also check --emit-relocs.
# RUN: ld.lld --emit-relocs %t.o -o %t
# RUN: llvm-readelf -x .got %t 2>&1 | FileCheck --check-prefix=LE-GOT %s
# RUN: llvm-objdump -dr --no-show-raw-insn %t | FileCheck --check-prefixes=LER %s

# RUN: ld.lld --emit-relocs --no-relax %t.o -o %t.norelax
# RUN: llvm-readelf -x .got %t.norelax 2>&1 | FileCheck --check-prefix=LE-GOT %s
# RUN: llvm-objdump -dr --no-show-raw-insn %t.norelax | FileCheck --check-prefixes=LE %s

# LE-GOT: could not find section '.got'

# a@tprel = st_value(a) = 0xfff
# b@tprel = st_value(a) = 0x1000
# LE: 20158: nop
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: ori $a0, $zero, 4095
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: add.d $a0, $a0, $tp
# LE-NEXT: 20164: lu12i.w $a1, 1
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LE-NEXT: ori $a1, $a1, 0
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LE-NEXT: add.d $a1, $a1, $tp
# LE-NEXT: 20170: nop
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: lu12i.w $a3, 1
# LE-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LE-NEXT: R_LARCH_RELAX *ABS*
# LE-NEXT: ori $a2, $zero, 4095
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LE-NEXT: ori $a3, $a3, 0
# LE-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LE-NEXT: add.d $a2, $a2, $tp
# LE-NEXT: add.d $a3, $a3, $tp

# LER: 20158: ori $a0, $zero, 4095
# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LER-NEXT: R_LARCH_RELAX *ABS*
# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LER-NEXT: R_LARCH_RELAX *ABS*
# LER-NEXT: add.d $a0, $a0, $tp
# LER-NEXT: 20160: lu12i.w $a1, 1
# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LER-NEXT: ori $a1, $a1, 0
# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LER-NEXT: add.d $a1, $a1, $tp
# LER-NEXT: 2016c: lu12i.w $a3, 1
# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 a
# LER-NEXT: R_LARCH_RELAX *ABS*
# LER-NEXT: R_LARCH_TLS_IE_PC_HI20 b
# LER-NEXT: R_LARCH_RELAX *ABS*
# LER-NEXT: ori $a2, $zero, 4095
# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 a
# LER-NEXT: ori $a3, $a3, 0
# LER-NEXT: R_LARCH_TLS_IE_PC_LO12 b
# LER-NEXT: add.d $a2, $a2, $tp
# LER-NEXT: add.d $a3, $a3, $tp

la.tls.ie $a0, a # relax
add.d $a0, $a0, $tp

# PCALAU12I does not have R_LARCH_RELAX. No relaxation.
pcalau12i $a1, %ie_pc_hi20(b)
ld.d $a1, $a1, %ie_pc_lo12(b)
add.d $a1, $a1, $tp

# Test instructions are interleaved.
# PCALAU12I has an R_LARCH_RELAX. We perform relaxation.
pcalau12i $a2, %ie_pc_hi20(a)
.reloc .-4, R_LARCH_RELAX, 0
pcalau12i $a3, %ie_pc_hi20(b)
.reloc .-4, R_LARCH_RELAX, 0
ld.d $a2, $a2, %ie_pc_lo12(a)
ld.d $a3, $a3, %ie_pc_lo12(b)
add.d $a2, $a2, $tp
add.d $a3, $a3, $tp

.section .tbss,"awT",@nobits
.globl a
.zero 0xfff ## Place a at 0xfff, LE needs only one ins.
a:
.zero 1 ## Place b at 0x1000, LE needs two ins.
b:
.zero 4
Loading