Skip to content

Commit 4b91af1

Browse files
committed
[lld][LoongArch] Implement TLSDESC GD/LD to IE/LE.
Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) ------ * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ```
1 parent 8a351f1 commit 4b91af1

File tree

4 files changed

+171
-17
lines changed

4 files changed

+171
-17
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 147 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,13 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
4243
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4344
void finalizeRelax(int passes) const override;
45+
46+
private:
47+
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48+
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4449
};
4550
} // end anonymous namespace
4651

@@ -58,6 +63,7 @@ enum Op {
5863
LU12I_W = 0x14000000,
5964
PCADDI = 0x18000000,
6065
PCADDU12I = 0x1c000000,
66+
PCALAU12I = 0x1a000000,
6167
LD_W = 0x28800000,
6268
LD_D = 0x28c00000,
6369
JIRL = 0x4c000000,
@@ -69,6 +75,7 @@ enum Reg {
6975
R_ZERO = 0,
7076
R_RA = 1,
7177
R_TP = 2,
78+
R_A0 = 4,
7279
R_T0 = 12,
7380
R_T1 = 13,
7481
R_T2 = 14,
@@ -961,7 +968,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
961968
case R_LARCH_TLS_LD_PC_HI20:
962969
case R_LARCH_TLS_DESC_PC_HI20:
963970
// The overflow check for i+2 will be carried out in isPairRelaxable.
964-
if (isPairRelaxable(relocs, i))
971+
if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC &&
972+
r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i))
965973
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
966974
break;
967975
case R_LARCH_CALL36:
@@ -1046,6 +1054,103 @@ static void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {
10461054
}
10471055
}
10481056

1057+
// Convert TLSDESC GD/LD to IE.
1058+
// In normal or medium code model, there are two forms of code sequences:
1059+
// * pcalau12i $a0, %desc_pc_hi20(sym_desc)
1060+
// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
1061+
// * ld.d $ra, $a0, %desc_ld(sym_desc)
1062+
// * jirl $ra, $ra, %desc_call(sym_desc)
1063+
// ------
1064+
// * pcaddi $a0, %desc_pcrel_20(a)
1065+
// * load $ra, $a0, %desc_ld(a)
1066+
// * jirl $ra, $ra, %desc_call(a)
1067+
//
1068+
// The code sequence obtained is as follows:
1069+
// * pcalau12i $a0, %ie_pc_hi20(sym_ie)
1070+
// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
1071+
//
1072+
// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the
1073+
// preceding instructions to NOPs, due to both forms of code sequence
1074+
// (corresponding to relocation combinations:
1075+
// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and
1076+
// R_LARCH_TLS_DESC_PCREL20_S2) have same process.
1077+
//
1078+
// When relaxation enables, redundant NOPs can be removed.
1079+
void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel,
1080+
uint64_t val) const {
1081+
switch (rel.type) {
1082+
case R_LARCH_TLS_DESC_PC_HI20:
1083+
case R_LARCH_TLS_DESC_PC_LO12:
1084+
case R_LARCH_TLS_DESC_PCREL20_S2:
1085+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1086+
break;
1087+
case R_LARCH_TLS_DESC_LD:
1088+
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
1089+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
1090+
break;
1091+
case R_LARCH_TLS_DESC_CALL:
1092+
write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0,
1093+
0)); // ld.[wd] $a0, $a0, %ie_pc_lo12
1094+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
1095+
break;
1096+
default:
1097+
llvm_unreachable("unsupported relocation for TLSDESC to IE");
1098+
}
1099+
}
1100+
1101+
// Convert TLSDESC GD/LD to LE.
1102+
// The code sequence obtained in the normal or medium code model is as follows:
1103+
// * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0
1104+
// * ori $a0 $a0, %le_lo12(sym_le)
1105+
// See the comment in tlsdescToIe for detailed information.
1106+
void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
1107+
uint64_t val) const {
1108+
assert(isInt<32>(val) &&
1109+
"val exceeds the range of medium code model in tlsdescToLe");
1110+
1111+
bool isUInt12 = isUInt<12>(val);
1112+
switch (rel.type) {
1113+
case R_LARCH_TLS_DESC_PC_HI20:
1114+
case R_LARCH_TLS_DESC_PC_LO12:
1115+
case R_LARCH_TLS_DESC_PCREL20_S2:
1116+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1117+
break;
1118+
case R_LARCH_TLS_DESC_LD:
1119+
if (isUInt12)
1120+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1121+
else
1122+
write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12),
1123+
0)); // lu12i.w $a0, %le_hi20
1124+
break;
1125+
case R_LARCH_TLS_DESC_CALL:
1126+
if (isUInt12)
1127+
write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12
1128+
else
1129+
write32le(loc,
1130+
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
1131+
break;
1132+
default:
1133+
llvm_unreachable("unsupported relocation for TLSDESC to LE");
1134+
}
1135+
}
1136+
1137+
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
1138+
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
1139+
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
1140+
// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other instructions related to the
1141+
// Hi20 relocation (pcalau12i) should be adjusted to
1142+
// RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC. Specifically, in the normal or
1143+
// medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL is
1144+
// the candidate of Lo12 relocation.
1145+
RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
1146+
if (expr == R_RELAX_TLS_GD_TO_IE) {
1147+
if (type != R_LARCH_TLS_DESC_CALL)
1148+
return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
1149+
return R_RELAX_TLS_GD_TO_IE_ABS;
1150+
}
1151+
return expr;
1152+
}
1153+
10491154
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10501155
const unsigned bits = ctx.arg.is64 ? 64 : 32;
10511156
uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1088,6 +1193,47 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10881193
tlsIeToLe(loc, rel, val);
10891194
}
10901195
continue;
1196+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
1197+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1198+
// LoongArch does not support TLSDESC GD/LD to LE/IE optimization in the
1199+
// extreme code model. In these cases, the relocs are as follows:
1200+
//
1201+
// * i -- R_LARCH_TLS_DESC_PC_HI20
1202+
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1203+
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1204+
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1205+
isExtreme =
1206+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1207+
}
1208+
[[fallthrough]];
1209+
case R_RELAX_TLS_GD_TO_IE_ABS:
1210+
if (isExtreme) {
1211+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1212+
continue;
1213+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1214+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1215+
bits);
1216+
relocateNoSym(loc, rel.type, val);
1217+
} else {
1218+
tlsdescToIe(loc, rel, val);
1219+
}
1220+
continue;
1221+
case R_RELAX_TLS_GD_TO_LE:
1222+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1223+
isExtreme =
1224+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1225+
}
1226+
if (isExtreme) {
1227+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1228+
continue;
1229+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1230+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1231+
bits);
1232+
relocateNoSym(loc, rel.type, val);
1233+
} else {
1234+
tlsdescToLe(loc, rel, val);
1235+
}
1236+
continue;
10911237
default:
10921238
break;
10931239
}

lld/ELF/InputSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -831,6 +831,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
831831
case R_GOTPLT_PC:
832832
return r.sym->getGotPltVA(ctx) + a - p;
833833
case RE_LOONGARCH_GOT_PAGE_PC:
834+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
834835
if (r.sym->hasFlag(NEEDS_TLSGD))
835836
return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p,
836837
r.type);

lld/ELF/Relocations.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1346,22 +1346,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13461346
if (ctx.arg.emachine == EM_MIPS)
13471347
return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);
13481348

1349-
// LoongArch does not yet implement transition from TLSDESC to LE/IE, so
1350-
// generate TLSDESC dynamic relocation for the dynamic linker to handle.
1351-
if (ctx.arg.emachine == EM_LOONGARCH &&
1352-
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1353-
R_TLSDESC_CALL>(expr)) {
1354-
if (expr != R_TLSDESC_CALL) {
1355-
sym.setFlags(NEEDS_TLSDESC);
1356-
sec->addReloc({expr, type, offset, addend, &sym});
1357-
}
1358-
return 1;
1359-
}
1360-
13611349
bool isRISCV = ctx.arg.emachine == EM_RISCV;
13621350

13631351
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
1364-
R_TLSDESC_GOTPLT>(expr) &&
1352+
R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
13651353
ctx.arg.shared) {
13661354
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
13671355
// set NEEDS_TLSDESC on the label.
@@ -1375,10 +1363,14 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13751363
return 1;
13761364
}
13771365

1378-
// LoongArch supports IE to LE optimization in non-extreme code model.
1366+
// LoongArch supports IE to LE, DESC GD/LD to IE/LE optimizations in
1367+
// non-extreme code model.
13791368
bool execOptimizeInLoongArch =
13801369
ctx.arg.emachine == EM_LOONGARCH &&
1381-
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
1370+
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12 ||
1371+
type == R_LARCH_TLS_DESC_PC_HI20 || type == R_LARCH_TLS_DESC_PC_LO12 ||
1372+
type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL ||
1373+
type == R_LARCH_TLS_DESC_PCREL20_S2);
13821374

13831375
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
13841376
// optimizations.
@@ -1437,9 +1429,23 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
14371429
return 1;
14381430
}
14391431

1432+
// LoongArch does not support transition from TLSDESC to LE/IE in the extreme
1433+
// code model, in which NEEDS_TLSDESC should set, rather than NEEDS_TLSGD. So
1434+
// we check independently.
1435+
if (ctx.arg.emachine == EM_LOONGARCH &&
1436+
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1437+
R_TLSDESC_CALL>(expr) &&
1438+
!execOptimize) {
1439+
if (expr != R_TLSDESC_CALL) {
1440+
sym.setFlags(NEEDS_TLSDESC);
1441+
sec->addReloc({expr, type, offset, addend, &sym});
1442+
}
1443+
return 1;
1444+
}
1445+
14401446
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
14411447
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
1442-
RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
1448+
RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) {
14431449
if (!execOptimize) {
14441450
sym.setFlags(NEEDS_TLSGD);
14451451
sec->addReloc({expr, type, offset, addend, &sym});

lld/ELF/Relocations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ enum RelExpr {
131131
RE_LOONGARCH_GOT_PAGE_PC,
132132
RE_LOONGARCH_TLSGD_PAGE_PC,
133133
RE_LOONGARCH_TLSDESC_PAGE_PC,
134+
RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
134135
};
135136

136137
// Architecture-neutral representation of relocation.

0 commit comments

Comments
 (0)