Skip to content

Commit dff3031

Browse files
committed
[lld][LoongArch] Implement TLSDESC GD/LD to IE/LE.
Support TLSDESC to initial-exec or local-exec optimizations. Introduce a new hook RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC and use existing R_RELAX_TLS_GD_TO_IE_ABS to support TLSDESC => IE, while use existing R_RELAX_TLS_GD_TO_LE to support TLSDESC => LE. In normal or medium code model, there are two forms of code sequences: * pcalau12i $a0, %desc_pc_hi20(sym_desc) * addi.d $a0, $a0, %desc_pc_lo12(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) ------ * pcaddi $a0, %desc_pcrel_20(sym_desc) * ld.d $ra, $a0, %desc_ld(sym_desc) * jirl $ra, $ra, %desc_call(sym_desc) The code sequence obtained is as follows: * pcalau12i $a0, %ie_pc_hi20(sym_ie) * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie) Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the preceding instructions to NOPs, due to both forms of code sequence (corresponding to relocation combinations: R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and R_LARCH_TLS_DESC_PCREL20_S2) have same process. FIXME: When relaxation enables, redundant NOPs can be removed. It will be implemented in a future patch. Note: All forms of TLSDESC code sequences should not appear interleaved in the normal, medium or extreme code model, which compilers do not generate and lld is unsupported. This is thanks to the guard in PostRASchedulerList.cpp in llvm. ``` Calls are not scheduling boundaries before register allocation, but post-ra we don't gain anything by scheduling across calls since we don't need to worry about register pressure. ```
1 parent 9d99de8 commit dff3031

File tree

4 files changed

+169
-17
lines changed

4 files changed

+169
-17
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 145 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,14 @@ class LoongArch final : public TargetInfo {
3939
void relocate(uint8_t *loc, const Relocation &rel,
4040
uint64_t val) const override;
4141
bool relaxOnce(int pass) const override;
42+
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
4243
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4344
void finalizeRelax(int passes) const override;
4445

4546
private:
4647
void tlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
48+
void tlsdescToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
49+
void tlsdescToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
4750
};
4851
} // end anonymous namespace
4952

@@ -61,6 +64,7 @@ enum Op {
6164
LU12I_W = 0x14000000,
6265
PCADDI = 0x18000000,
6366
PCADDU12I = 0x1c000000,
67+
PCALAU12I = 0x1a000000,
6468
LD_W = 0x28800000,
6569
LD_D = 0x28c00000,
6670
JIRL = 0x4c000000,
@@ -72,6 +76,7 @@ enum Reg {
7276
R_ZERO = 0,
7377
R_RA = 1,
7478
R_TP = 2,
79+
R_A0 = 4,
7580
R_T0 = 12,
7681
R_T1 = 13,
7782
R_T2 = 14,
@@ -962,7 +967,8 @@ static bool relax(Ctx &ctx, InputSection &sec) {
962967
case R_LARCH_TLS_LD_PC_HI20:
963968
case R_LARCH_TLS_DESC_PC_HI20:
964969
// The overflow check for i+2 will be carried out in isPairRelaxable.
965-
if (isPairRelaxable(relocs, i))
970+
if (r.expr != RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC &&
971+
r.expr != R_RELAX_TLS_GD_TO_LE && isPairRelaxable(relocs, i))
966972
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
967973
break;
968974
case R_LARCH_CALL36:
@@ -1047,6 +1053,103 @@ void LoongArch::tlsIeToLe(uint8_t *loc, const Relocation &rel,
10471053
}
10481054
}
10491055

1056+
// Convert TLSDESC GD/LD to IE.
1057+
// In normal or medium code model, there are two forms of code sequences:
1058+
// * pcalau12i $a0, %desc_pc_hi20(sym_desc)
1059+
// * addi.d $a0, $a0, %desc_pc_lo12(sym_desc)
1060+
// * ld.d $ra, $a0, %desc_ld(sym_desc)
1061+
// * jirl $ra, $ra, %desc_call(sym_desc)
1062+
// ------
1063+
// * pcaddi $a0, %desc_pcrel_20(a)
1064+
// * load $ra, $a0, %desc_ld(a)
1065+
// * jirl $ra, $ra, %desc_call(a)
1066+
//
1067+
// The code sequence obtained is as follows:
1068+
// * pcalau12i $a0, %ie_pc_hi20(sym_ie)
1069+
// * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
1070+
//
1071+
// Simplicity, whether tlsdescToIe or tlsdescToLe, we always tend to convert the
1072+
// preceding instructions to NOPs, due to both forms of code sequence
1073+
// (corresponding to relocation combinations:
1074+
// R_LARCH_TLS_DESC_PC_HI20+R_LARCH_TLS_DESC_PC_LO12 and
1075+
// R_LARCH_TLS_DESC_PCREL20_S2) have same process.
1076+
//
1077+
// When relaxation enables, redundant NOPs can be removed.
1078+
void LoongArch::tlsdescToIe(uint8_t *loc, const Relocation &rel,
1079+
uint64_t val) const {
1080+
switch (rel.type) {
1081+
case R_LARCH_TLS_DESC_PC_HI20:
1082+
case R_LARCH_TLS_DESC_PC_LO12:
1083+
case R_LARCH_TLS_DESC_PCREL20_S2:
1084+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1085+
break;
1086+
case R_LARCH_TLS_DESC_LD:
1087+
write32le(loc, insn(PCALAU12I, R_A0, 0, 0)); // pcalau12i $a0, %ie_pc_hi20
1088+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_HI20, val);
1089+
break;
1090+
case R_LARCH_TLS_DESC_CALL:
1091+
write32le(loc, insn(ctx.arg.is64 ? LD_D : LD_W, R_A0, R_A0,
1092+
0)); // ld.[wd] $a0, $a0, %ie_pc_lo12
1093+
relocateNoSym(loc, R_LARCH_TLS_IE_PC_LO12, val);
1094+
break;
1095+
default:
1096+
llvm_unreachable("unsupported relocation for TLSDESC to IE");
1097+
}
1098+
}
1099+
1100+
// Convert TLSDESC GD/LD to LE.
1101+
// The code sequence obtained in the normal or medium code model is as follows:
1102+
// * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0
1103+
// * ori $a0 $a0, %le_lo12(sym_le)
1104+
// See the comment in tlsdescToIe for detailed information.
1105+
void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
1106+
uint64_t val) const {
1107+
assert(isInt<32>(val) &&
1108+
"val exceeds the range of medium code model in tlsdescToLe");
1109+
1110+
bool isUInt12 = isUInt<12>(val);
1111+
switch (rel.type) {
1112+
case R_LARCH_TLS_DESC_PC_HI20:
1113+
case R_LARCH_TLS_DESC_PC_LO12:
1114+
case R_LARCH_TLS_DESC_PCREL20_S2:
1115+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1116+
break;
1117+
case R_LARCH_TLS_DESC_LD:
1118+
if (isUInt12)
1119+
write32le(loc, insn(ANDI, R_ZERO, R_ZERO, 0)); // nop
1120+
else
1121+
write32le(loc, insn(LU12I_W, R_A0, extractBits(val, 31, 12),
1122+
0)); // lu12i.w $a0, %le_hi20
1123+
break;
1124+
case R_LARCH_TLS_DESC_CALL:
1125+
if (isUInt12)
1126+
write32le(loc, insn(ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12
1127+
else
1128+
write32le(loc,
1129+
insn(ORI, R_A0, R_A0, lo12(val))); // ori $a0, $a0, %le_lo12
1130+
break;
1131+
default:
1132+
llvm_unreachable("unsupported relocation for TLSDESC to LE");
1133+
}
1134+
}
1135+
1136+
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
1137+
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
1138+
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
1139+
// R_RELAX_TLS_GD_TO_IE_ABS, while expr of other instructions related to the
1140+
// Hi20 relocation (pcalau12i) should be adjusted to
1141+
// RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC. Specifically, in the normal or
1142+
// medium code model, the instruction with relocation R_LARCH_TLS_DESC_CALL is
1143+
// the candidate of Lo12 relocation.
1144+
RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
1145+
if (expr == R_RELAX_TLS_GD_TO_IE) {
1146+
if (type != R_LARCH_TLS_DESC_CALL)
1147+
return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
1148+
return R_RELAX_TLS_GD_TO_IE_ABS;
1149+
}
1150+
return expr;
1151+
}
1152+
10501153
void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10511154
const unsigned bits = ctx.arg.is64 ? 64 : 32;
10521155
uint64_t secAddr = sec.getOutputSection()->addr;
@@ -1089,6 +1192,47 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
10891192
tlsIeToLe(loc, rel, val);
10901193
}
10911194
continue;
1195+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
1196+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1197+
// LoongArch does not support TLSDESC GD/LD to LE/IE optimization in the
1198+
// extreme code model. In these cases, the relocs are as follows:
1199+
//
1200+
// * i -- R_LARCH_TLS_DESC_PC_HI20
1201+
// * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1202+
// * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1203+
// * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1204+
isExtreme =
1205+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1206+
}
1207+
[[fallthrough]];
1208+
case R_RELAX_TLS_GD_TO_IE_ABS:
1209+
if (isExtreme) {
1210+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1211+
continue;
1212+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1213+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1214+
bits);
1215+
relocateNoSym(loc, rel.type, val);
1216+
} else {
1217+
tlsdescToIe(loc, rel, val);
1218+
}
1219+
continue;
1220+
case R_RELAX_TLS_GD_TO_LE:
1221+
if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1222+
isExtreme =
1223+
(i + 2 < size && relocs[i + 2].type == R_LARCH_TLS_DESC64_PC_LO20);
1224+
}
1225+
if (isExtreme) {
1226+
if (rel.type == R_LARCH_TLS_DESC_CALL)
1227+
continue;
1228+
rel.expr = getRelExpr(rel.type, *rel.sym, loc);
1229+
val = SignExtend64(sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset),
1230+
bits);
1231+
relocateNoSym(loc, rel.type, val);
1232+
} else {
1233+
tlsdescToLe(loc, rel, val);
1234+
}
1235+
continue;
10921236
default:
10931237
break;
10941238
}

lld/ELF/InputSection.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -825,6 +825,7 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
825825
case R_GOTPLT_PC:
826826
return r.sym->getGotPltVA(ctx) + a - p;
827827
case RE_LOONGARCH_GOT_PAGE_PC:
828+
case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
828829
if (r.sym->hasFlag(NEEDS_TLSGD))
829830
return getLoongArchPageDelta(ctx.in.got->getGlobalDynAddr(*r.sym) + a, p,
830831
r.type);

lld/ELF/Relocations.cpp

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,22 +1316,10 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13161316
if (ctx.arg.emachine == EM_MIPS)
13171317
return handleMipsTlsRelocation(ctx, type, sym, *sec, offset, addend, expr);
13181318

1319-
// LoongArch does not yet implement transition from TLSDESC to LE/IE, so
1320-
// generate TLSDESC dynamic relocation for the dynamic linker to handle.
1321-
if (ctx.arg.emachine == EM_LOONGARCH &&
1322-
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1323-
R_TLSDESC_CALL>(expr)) {
1324-
if (expr != R_TLSDESC_CALL) {
1325-
sym.setFlags(NEEDS_TLSDESC);
1326-
sec->addReloc({expr, type, offset, addend, &sym});
1327-
}
1328-
return 1;
1329-
}
1330-
13311319
bool isRISCV = ctx.arg.emachine == EM_RISCV;
13321320

13331321
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
1334-
R_TLSDESC_GOTPLT>(expr) &&
1322+
R_TLSDESC_GOTPLT, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr) &&
13351323
ctx.arg.shared) {
13361324
// R_RISCV_TLSDESC_{LOAD_LO12,ADD_LO12_I,CALL} reference a label. Do not
13371325
// set NEEDS_TLSDESC on the label.
@@ -1343,10 +1331,14 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
13431331
return 1;
13441332
}
13451333

1346-
// LoongArch support IE to LE optimization in non-extreme code model.
1334+
// LoongArch support IE to LE, DESC GD/LD to IE/LE optimizations in
1335+
// non-extreme code model.
13471336
bool execOptimizeInLoongArch =
13481337
ctx.arg.emachine == EM_LOONGARCH &&
1349-
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12);
1338+
(type == R_LARCH_TLS_IE_PC_HI20 || type == R_LARCH_TLS_IE_PC_LO12 ||
1339+
type == R_LARCH_TLS_DESC_PC_HI20 || type == R_LARCH_TLS_DESC_PC_LO12 ||
1340+
type == R_LARCH_TLS_DESC_LD || type == R_LARCH_TLS_DESC_CALL ||
1341+
type == R_LARCH_TLS_DESC_PCREL20_S2);
13501342

13511343
// ARM, Hexagon, LoongArch and RISC-V do not support GD/LD to IE/LE
13521344
// optimizations.
@@ -1405,9 +1397,23 @@ unsigned RelocationScanner::handleTlsRelocation(RelExpr expr, RelType type,
14051397
return 1;
14061398
}
14071399

1400+
// LoongArch does not support transition from TLSDESC to LE/IE in the extreme
1401+
// code model, in which NEEDS_TLSDESC should set, rather than NEEDS_TLSGD. So
1402+
// we check independently.
1403+
if (ctx.arg.emachine == EM_LOONGARCH &&
1404+
oneof<RE_LOONGARCH_TLSDESC_PAGE_PC, R_TLSDESC, R_TLSDESC_PC,
1405+
R_TLSDESC_CALL>(expr) &&
1406+
!execOptimize) {
1407+
if (expr != R_TLSDESC_CALL) {
1408+
sym.setFlags(NEEDS_TLSDESC);
1409+
sec->addReloc({expr, type, offset, addend, &sym});
1410+
}
1411+
return 1;
1412+
}
1413+
14081414
if (oneof<RE_AARCH64_TLSDESC_PAGE, R_TLSDESC, R_TLSDESC_CALL, R_TLSDESC_PC,
14091415
R_TLSDESC_GOTPLT, R_TLSGD_GOT, R_TLSGD_GOTPLT, R_TLSGD_PC,
1410-
RE_LOONGARCH_TLSGD_PAGE_PC>(expr)) {
1416+
RE_LOONGARCH_TLSGD_PAGE_PC, RE_LOONGARCH_TLSDESC_PAGE_PC>(expr)) {
14111417
if (!execOptimize) {
14121418
sym.setFlags(NEEDS_TLSGD);
14131419
sec->addReloc({expr, type, offset, addend, &sym});

lld/ELF/Relocations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ enum RelExpr {
129129
RE_LOONGARCH_GOT_PAGE_PC,
130130
RE_LOONGARCH_TLSGD_PAGE_PC,
131131
RE_LOONGARCH_TLSDESC_PAGE_PC,
132+
RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC,
132133
};
133134

134135
// Architecture-neutral representation of relocation.

0 commit comments

Comments
 (0)