Skip to content

Commit 3093936

Browse files
committed
[lld][LoongArch] Implement TLSDESC GD/LD to IE/LE.
Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. Different code models correspond to different TLSDESC code sequences. In normal or medium code model, there are two forms of code sequences: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc) addi.d $a0, $a0, %desc_pc_lo12(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) --- pcaddi $a0, %desc_pcrel_20(sym_desc) ld.d $ra, $a0, %desc_ld(sym_desc) jirl $ra, $ra, %desc_call(sym_desc) ``` In extreme code model, there is only one: ``` pcalau12i $a0, %desc_pc_hi20(sym_desc_large) addi.d $a1, $zero, %desc_pc_lo12(sym_desc_large) lu32i.d $a1, %desc64_pc_lo20(sym_desc_large) lu52i.d $a1, $a1, %desc64_pc_hi12(sym_desc_large) add.d $a0, $a0, $a1 ld.d $ra, $a0, %desc_ld(sym_desc_large) jirl $ra, $ra, %desc_call(sym_desc_large) ``` Simplicity, we tend to convert the preceding instructions to NOP in normal or medium code model, due to both forms of code sequence (corresponding to relocation combinations: `R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12` and `R_LARCH_TLS_DESC_PCREL20_S2`) have same process. However, for the extreme code model, IE optimization requires a temporary register $a1, which exists in the original preceding code sequence. To avoid additional bookkeeping, in the exterme code model, we convert the last two instructions to NOP. Fortunately, the extreme code model only has one form (donot use `R_LARCH_TLS_DESC_PCREL20_S2` relocation), and it makes this conversion strategy feasible.
1 parent afce937 commit 3093936

File tree

4 files changed

+276
-15
lines changed

4 files changed

+276
-15
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,15 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
43+
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4244
void finalizeRelax(int passes) const override;
45+
46+
private:
47+
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val,
48+
bool isExtreme) const;
49+
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val,
50+
bool isExtreme) const;
4351
};
4452
} // end anonymous namespace
4553

@@ -53,16 +61,23 @@ enum Op {
5361
ADDI_W = 0x02800000,
5462
ADDI_D = 0x02c00000,
5563
ANDI = 0x03400000,
64+
ORI = 0x03800000,
65+
LU12I_W = 0x14000000,
66+
LU32I_D = 0x16000000,
67+
LU52I_D = 0x03000000,
68+
PCALAU12I = 0x1a000000,
5669
PCADDU12I = 0x1c000000,
5770
LD_W = 0x28800000,
5871
LD_D = 0x28c00000,
72+
LDX_D = 0x380c0000,
5973
JIRL = 0x4c000000,
6074
};
6175

6276
enum Reg {
6377
R_ZERO = 0,
6478
R_RA = 1,
6579
R_TP = 2,
80+
R_A0 = 4,
6681
R_T0 = 12,
6782
R_T1 = 13,
6883
R_T2 = 14,
@@ -131,6 +146,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131146
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
132147
}
133148

149+
static uint32_t getD5(uint64_t v) { return extractBits(v, 4, 0); }
150+
151+
static uint32_t getJ5(uint64_t v) { return extractBits(v, 9, 5); }
152+
134153
static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
135154
uint32_t immLo = extractBits(imm, 15, 0);
136155
uint32_t immHi = extractBits(imm, 20, 16);
@@ -812,6 +831,255 @@ static bool relax(Ctx &ctx, InputSection &sec) {
812831
return changed;
813832
}
814833

834+
// Convert TLSDESC GD/LD to IE.
835+
// The code sequence obtained in the normal or medium code model is as follows:
836+
// * pcalau12i $a0, %ie_pc_hi20(sym_ie)
837+
// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
838+
//
839+
// The code sequence obtained in the extreme code model is as follows:
840+
// * pcalau12i $a0, %ie_pc_hi20(sym_ie_large)
841+
// * addi.d $a1, $r0, %ie_pc_lo12(sym_ie_large)
842+
// * lu32i.d $a1, %ie64_pc_lo20(sym_ie_large)
843+
// * lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large)
844+
// * ldx.d $a0, $a0, $a1
845+
//
846+
// The optimization for tlsdescToIe and tlsdescToLe in the extreme code model
847+
// always preserves the preceding code sequence and converts the last
848+
// instructions to NOP, while the optimization for the normal or medium code
849+
// models of tlsdescToIe and tlsdescToLe do the oppsite. This is because, in
850+
// the extreme code model, tlsdescToIe requires a temporary register $a1, and
851+
// the preceding unoptimized tls.desc code sequence contains this temporary
852+
// register, eliminating additional bookkeeping.
853+
void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val,
854+
bool isExtreme) const {
855+
if (isExtreme) { // extreme
856+
const uint32_t currInsn = read32le(loc);
857+
switch (rel.type) {
858+
case R_LARCH_TLS_DESC_PC_HI20:
859+
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
860+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
861+
break;
862+
case R_LARCH_TLS_DESC_PC_LO12:
863+
write32le(loc, insn(ADDI_D, getD5(currInsn), R_ZERO,
864+
0)); // addi.d $a1, $r0, %ie_pc_lo12
865+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
866+
break;
867+
case R_LARCH_TLS_DESC64_PC_LO20:
868+
write32le(loc, insn(LU32I_D, getD5(currInsn), 0,
869+
0)); // lu32i.d $a1, %ie64_pc_lo20
870+
relocateNoSym(loc, R_LARCH_TLS_IE64_PC_LO20, val);
871+
break;
872+
case R_LARCH_TLS_DESC64_PC_HI12:
873+
write32le(loc, insn(LU52I_D, getD5(currInsn), getJ5(currInsn),
874+
0)); // lu52i.d $a1, $a1, %ie64_pc_hi12
875+
relocateNoSym(loc, R_LARCH_TLS_IE64_PC_HI12, val);
876+
write32le(loc + 4, insn(LDX_D, R_A0, R_A0,
877+
getD5(currInsn))); // ldx.d $a0, $a0, $a1
878+
break;
879+
case R_LARCH_TLS_DESC_LD:
880+
case R_LARCH_TLS_DESC_CALL:
881+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
882+
break;
883+
default:
884+
llvm_unreachable("unsupported relocation for TLSDESC to IE");
885+
}
886+
} else { // normal or medium
887+
switch (rel.type) {
888+
case R_LARCH_TLS_DESC_PC_HI20:
889+
case R_LARCH_TLS_DESC_PC_LO12:
890+
case R_LARCH_TLS_DESC_PCREL20_S2:
891+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
892+
break;
893+
case R_LARCH_TLS_DESC_LD:
894+
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
895+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
896+
break;
897+
case R_LARCH_TLS_DESC_CALL:
898+
write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0,
899+
0)); // ld.[wd] $a0, $a0, %ie_pc_lo12
900+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
901+
break;
902+
default:
903+
llvm_unreachable("unsupported relocation for TLSDESC to IE");
904+
}
905+
}
906+
}
907+
908+
// Convert TLSDESC GD/LD to LE.
909+
// The code sequence obtained in the normal or medium code model is as follows:
910+
// * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0
911+
// * ori $a0 $a0, %le_lo12(sym_le)
912+
//
913+
// The code sequence obtained in extreme code model is as follows:
914+
// * lu12i.w $a0, %le_hi20(sym_le_large)
915+
// * ori $a0, $a0, %le_lo12(sym_le_large)
916+
// * lu32i.d $a0, %le64_lo20(sym_le_large)
917+
// * lu52i.d $a0, $a0, %le64_hi20(sym_le_large)
918+
//
919+
// Note: In the extreme code model, it is possible for the generated code
920+
// sequence to include NOPs at both beginning and the end. Likely,
921+
// * nop; ori $a0, $r0, %le_lo12; nop; nop
922+
// This occurs because the four instructions are used to assemble each part of a
923+
// 64-bit value independently, without affecting each other. Therefore, to
924+
// obtain an efficient code sequence, NOPs are used as much as possible.
925+
// Additionally, the extreme code model does not participate in relaxation
926+
// optimization.
927+
void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val,
928+
bool isExtreme) const {
929+
if (isExtreme) { // extreme
930+
switch (rel.type) {
931+
case R_LARCH_TLS_DESC_PC_HI20:
932+
if (uint32_t hi20 = extractBits(val, 31, 12))
933+
write32le(loc,
934+
insn(LU12I_W, R_A0, hi20, 0)); // lu12i.w $a0, $a0, %le_hi20
935+
else
936+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
937+
break;
938+
case R_LARCH_TLS_DESC_PC_LO12:
939+
if (extractBits(val, 31, 12))
940+
write32le(loc,
941+
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
942+
else
943+
write32le(loc,
944+
insn(ORI, R_A0, R_ZERO, lo12(val))); // ori $a0, $r0, %le_lo12
945+
break;
946+
case R_LARCH_TLS_DESC64_PC_LO20:
947+
// If val[31] is 1, lu12i.w will set $a0[51-32]. So, clear it.
948+
if (uint32_t lo20 = extractBits(val, 51, 32) || extractBits(val, 31, 31))
949+
write32le(loc, insn(LU32I_D, R_A0, lo20, 0)); // lu32i.d $a0, %le64_lo20
950+
else
951+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
952+
break;
953+
case R_LARCH_TLS_DESC64_PC_HI12:
954+
// If val[31] is 1, lu12i.w will set $a0[63-52]. So, clear it.
955+
if (uint32_t hi12 =
956+
extractBits(val, 63, 52) || extractBits(val, 31, 31)) {
957+
write32le(loc, insn(LU52I_D, R_A0, R_A0,
958+
hi12)); // lu52i.d $a0, $a0, %le64_hi20
959+
// Due to add.d does not include relocation, an additional NOP needs to
960+
// be generated.
961+
write32le(loc + 4, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
962+
} else {
963+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
964+
write32le(loc + 4, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
965+
}
966+
break;
967+
case R_LARCH_TLS_DESC_LD:
968+
case R_LARCH_TLS_DESC_CALL:
969+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
970+
break;
971+
default:
972+
llvm_unreachable("unsupported relocation for TLSDESC to LE");
973+
}
974+
} else { // normal or medium
975+
switch (rel.type) {
976+
case R_LARCH_TLS_DESC_PC_HI20:
977+
case R_LARCH_TLS_DESC_PC_LO12:
978+
case R_LARCH_TLS_DESC_PCREL20_S2:
979+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
980+
break;
981+
case R_LARCH_TLS_DESC_LD:
982+
if (isUInt<12>(val))
983+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
984+
else if (isInt<32>(val))
985+
write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12),
986+
0)); // lu12i.w $a0, %le_hi20
987+
else
988+
Err(ctx) << val
989+
<< " exceeds the range of medium code model in tlsdescToLe";
990+
break;
991+
case R_LARCH_TLS_DESC_CALL:
992+
if (isUInt<12>(val))
993+
write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12
994+
else if (isInt<32>(val))
995+
write32le(loc,
996+
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
997+
else
998+
Err(ctx) << val
999+
<< " exceeds the range of medium code model in tlsdescToLe";
1000+
break;
1001+
default:
1002+
llvm_unreachable("unsupported relocation for TLSDESC to LE");
1003+
}
1004+
}
1005+
}
1006+
1007+
// During GD_TO_IE, the converted code sequence always includes an instruction
1008+
// related to the Lo12 relocation (ld.[wd] or addi.d). To obtain correct val in
1009+
// `getRelocTargetVA`, expr of this instruction should be adjusted to
1010+
// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other valid instructions (not NOP)
1011+
// should be adjusted to RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC.
1012+
// See the comment in tlsdescToIe for detailed information.
1013+
//
1014+
// Specifically, in the normal or medium code model, the instruction with
1015+
// relocation R_LARCH_TLS_DESC_CALL is the candidate of Lo12 relocation. And in
1016+
// the extreme code model, the instruction with R_LARCH_TLS_DESC_PC_LO12 is the
1017+
// candidate. Meanwhile, in the normal or medium code model, the instruction
1018+
// with R_LARCH_TLS_DESC_PC_LO12 will always be converted to NOP. Similarly, in
1019+
// the extreme code model, the instruction with R_LARCH_TLS_DESC_CALL will be
1020+
// converted to NOP. Therefore, the adjustment of the expr here is safe.
1021+
RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
1022+
if (expr == R_RELAX_TLS_GD_TO_IE) {
1023+
if (type != R_LARCH_TLS_DESC_PC_LO12 && type != R_LARCH_TLS_DESC_CALL)
1024+
return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
1025+
return R_RELAX_TLS_GD_TO_IE_ABS;
1026+
}
1027+
return expr;
1028+
}
1029+
1030+
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
1031+
const unsigned bits = ctx.arg.is64 ? 64 : 32;
1032+
uint64_t secAddr = sec.getOutputSection()->addr;
1033+
if (auto *s = dyn_cast<InputSection>(&sec))
1034+
secAddr += s->outSecOff;
1035+
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
1036+
secAddr += ehIn->getParent()->outSecOff;
1037+
bool isExtreme = false;
1038+
const ArrayRef<Relocation> relocs = sec.relocs();
1039+
for (size_t i = 0, size = relocs.size(); i != size; ++i) {
1040+
const Relocation &rel = relocs[i];
1041+
uint8_t *loc = buf + rel.offset;
1042+
const uint64_t val = SignExtend64(
1043+
sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
1044+
1045+
switch (rel.expr) {
1046+
case R_RELAX_HINT:
1047+
continue;
1048+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
1049+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1050+
// The relocation sequence in the extreme code model is as follows:
1051+
//
1052+
// * i -- R_LARCH_TLS_DESC_PC_HI20
1053+
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1054+
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1055+
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1056+
isExtreme =
1057+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1058+
}
1059+
[[fallthrough]];
1060+
case R_RELAX_TLS_GD_TO_IE_ABS:
1061+
tlsdescToIe(loc, rel, val, isExtreme);
1062+
continue;
1063+
case R_RELAX_TLS_GD_TO_LE:
1064+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1065+
// The relocation sequence in the extreme code model is as follows:
1066+
//
1067+
// * i -- R_LARCH_TLS_DESC_PC_HI20
1068+
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1069+
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1070+
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1071+
isExtreme =
1072+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1073+
}
1074+
tlsdescToLe(loc, rel, val, isExtreme);
1075+
continue;
1076+
default:
1077+
break;
1078+
}
1079+
relocate(loc, rel, val);
1080+
}
1081+
}
1082+
8151083
// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
8161084
// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
8171085
// shrinkage may reduce displacement and make more relocations eligible for

lld/ELF/InputSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
825825
case R_GOTPLT_PC:
826826
return r.sym->getGotPltVA(ctx) + a - p;
827827
case RE_LOONGARCH_GOT_PAGE_PC:
828+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
828829
if (r.sym->hasFlag(NEEDS_TLSGD))
829830
return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p,
830831
r.type);

lld/ELF/Relocations.cpp

Lines changed: 6 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,22 +1316,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13161316
if (ctx.arg.emachine == EM_MIPS)
13171317
return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);
13181318

1319-
// LoongArch does not yet implement transition from TLSDESC to LE/IE, so
1320-
// generate TLSDESC dynamic relocation for the dynamic linker to handle.
1321-
if (ctx.arg.emachine == EM_LOONGARCH &&
1322-
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1323-
R_TLSDESC_CALL>(expr)) {
1324-
if (expr != R_TLSDESC_CALL) {
1325-
sym.setFlags(NEEDS_TLSDESC);
1326-
sec->addReloc({expr, type, offset, addend, &sym});
1327-
}
1328-
return 1;
1329-
}
1330-
13311319
bool isRISCV = ctx.arg.emachine == EM_RISCV;
13321320

13331321
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
1334-
R_TLSDESC_GOTPLT>(expr) &&
1322+
R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
13351323
ctx.arg.shared) {
13361324
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
13371325
// set NEEDS_TLSDESC on the label.
@@ -1350,7 +1338,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13501338
// optimization as well.
13511339
bool execOptimize =
13521340
!ctx.arg.shared && ctx.arg.emachine != EM_ARM &&
1353-
ctx.arg.emachine != EM_HEXAGON && ctx.arg.emachine != EM_LOONGARCH &&
1341+
ctx.arg.emachine != EM_HEXAGON &&
1342+
!(ctx.arg.emachine == EM_LOONGARCH &&
1343+
expr != RE_LOONGARCH_TLSDESC_PAGE_PC && expr != R_TLSDESC &&
1344+
expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
13541345
!(isRISCV && expr != R_TLSDESC_PC && expr != R_TLSDESC_CALL) &&
13551346
!sec->file->ppc64DisableTLSRelax;
13561347

@@ -1401,7 +1392,7 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
14011392

14021393
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
14031394
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
1404-
RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
1395+
RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) {
14051396
if (!execOptimize) {
14061397
sym.setFlags(NEEDS_TLSGD);
14071398
sec->addReloc({expr, type, offset, addend, &sym});

lld/ELF/Relocations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,7 @@ enum RelExpr {
128128
RE_LOONGARCH_GOT,
129129
RE_LOONGARCH_GOT_PAGE_PC,
130130
RE_LOONGARCH_TLSGD_PAGE_PC,
131+
RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
131132
RE_LOONGARCH_TLSDESC_PAGE_PC,
132133
};
133134

0 commit comments

Comments
 (0)