@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
4646private:
4747 void tlsdescToIe (uint8_t *loc, const Relocation &rel, uint64_t val) const ;
4848 void tlsdescToLe (uint8_t *loc, const Relocation &rel, uint64_t val) const ;
49+ bool tryGotToPCRel (uint8_t *loc, const Relocation &rHi20,
50+ const Relocation &rLo12, uint64_t secAddr) const ;
4951};
5052} // end anonymous namespace
5153
@@ -1155,6 +1157,78 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
11551157 }
11561158}
11571159
1160+ // Try GOT indirection to PC relative optimization.
1161+ // From:
1162+ // * pcalau12i $a0, %got_pc_hi20(sym_got)
1163+ // * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
1164+ // To:
1165+ // * pcalau12i $a0, %pc_hi20(sym)
1166+ // * addi.w/d $a0, $a0, %pc_lo12(sym)
1167+ //
1168+ // Note: Althouth the optimization has been performed, the GOT entries still
1169+ // exists, similarly to AArch64. Eliminating the entries will increase code
1170+ // complexity.
1171+ bool LoongArch::tryGotToPCRel (uint8_t *loc, const Relocation &rHi20,
1172+ const Relocation &rLo12, uint64_t secAddr) const {
1173+ // Check if the relocations apply to consecutive instructions.
1174+ if (rHi20.offset + 4 != rLo12.offset )
1175+ return false ;
1176+
1177+ // Check if the relocations reference the same symbol and skip undefined,
1178+ // preemptible and STT_GNU_IFUNC symbols.
1179+ if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym ->isDefined () ||
1180+ rHi20.sym ->isPreemptible || rHi20.sym ->isGnuIFunc ())
1181+ return false ;
1182+
1183+ // GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
1184+ // in position-independent code because these instructions produce a relative
1185+ // address.
1186+ if ((ctx.arg .isPic && !cast<Defined>(*rHi20.sym ).section ))
1187+ return false ;
1188+
1189+ // Check if the addends of the both relocations are zero.
1190+ if (rHi20.addend != 0 || rLo12.addend != 0 )
1191+ return false ;
1192+
1193+ const uint32_t currInsn = read32le (loc);
1194+ const uint32_t nextInsn = read32le (loc + 4 );
1195+ const uint32_t ldOpcode = ctx.arg .is64 ? LD_D : LD_W;
1196+ // Check if the first instruction is PCALAU12I and the second instruction is
1197+ // LD.
1198+ if ((currInsn & 0xfe000000 ) != PCALAU12I ||
1199+ (nextInsn & 0xffc00000 ) != ldOpcode)
1200+ return false ;
1201+
1202+ // Check if use the same register.
1203+ if (getD5 (currInsn) != getJ5 (nextInsn) || getJ5 (nextInsn) != getD5 (nextInsn))
1204+ return false ;
1205+
1206+ Symbol &sym = *rHi20.sym ;
1207+ uint64_t symLocal = sym.getVA (ctx);
1208+ const int64_t displace = symLocal - getLoongArchPage (secAddr + rHi20.offset );
1209+ // Check if the symbol address is in
1210+ // [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
1211+ const int64_t underflow = -0x80000000LL - 0x800 ;
1212+ const int64_t overflow = 0x80000000LL - 0x800 ;
1213+ if (!(displace >= underflow && displace < overflow))
1214+ return false ;
1215+
1216+ Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset ,
1217+ rHi20.addend , &sym};
1218+ Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset , rLo12.addend ,
1219+ &sym};
1220+ uint64_t pageDelta =
1221+ getLoongArchPageDelta (symLocal, secAddr + rHi20.offset , rHi20.type );
1222+ // pcalau12i $a0, %pc_hi20
1223+ write32le (loc, insn (PCALAU12I, getD5 (currInsn), 0 , 0 ));
1224+ relocate (loc, newRHi20, pageDelta);
1225+ // addi.w/d $a0, $a0, %pc_lo12
1226+ write32le (loc + 4 , insn (ctx.arg .is64 ? ADDI_D : ADDI_W, getD5 (nextInsn),
1227+ getJ5 (nextInsn), 0 ));
1228+ relocate (loc + 4 , newRLo12, SignExtend64 (symLocal, 64 ));
1229+ return true ;
1230+ }
1231+
11581232// During TLSDESC GD_TO_IE, the converted code sequence always includes an
11591233// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
11601234// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1172,6 +1246,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
11721246 return expr;
11731247}
11741248
1249+ static bool pairForGotRels (ArrayRef<Relocation> relocs) {
1250+ // Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
1251+ // pairs.
1252+ size_t i = 0 ;
1253+ const size_t size = relocs.size ();
1254+ for (; i != size; ++i) {
1255+ if (relocs[i].type == R_LARCH_GOT_PC_HI20) {
1256+ if (i + 1 < size && relocs[i + 1 ].type == R_LARCH_GOT_PC_LO12) {
1257+ ++i;
1258+ continue ;
1259+ }
1260+ if (relaxable (relocs, i) && i + 2 < size &&
1261+ relocs[i + 2 ].type == R_LARCH_GOT_PC_LO12) {
1262+ i += 2 ;
1263+ continue ;
1264+ }
1265+ break ;
1266+ } else if (relocs[i].type == R_LARCH_GOT_PC_LO12) {
1267+ break ;
1268+ }
1269+ }
1270+ return i == size;
1271+ }
1272+
11751273void LoongArch::relocateAlloc (InputSectionBase &sec, uint8_t *buf) const {
11761274 const unsigned bits = ctx.arg .is64 ? 64 : 32 ;
11771275 uint64_t secAddr = sec.getOutputSection ()->addr ;
@@ -1181,6 +1279,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
11811279 secAddr += ehIn->getParent ()->outSecOff ;
11821280 bool isExtreme = false , isRelax = false ;
11831281 const MutableArrayRef<Relocation> relocs = sec.relocs ();
1282+ const bool isPairForGotRels = pairForGotRels (relocs);
11841283 for (size_t i = 0 , size = relocs.size (); i != size; ++i) {
11851284 Relocation &rel = relocs[i];
11861285 uint8_t *loc = buf + rel.offset ;
@@ -1264,6 +1363,24 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
12641363 tlsdescToLe (loc, rel, val);
12651364 }
12661365 continue ;
1366+ case RE_LOONGARCH_GOT_PAGE_PC:
1367+ // In LoongArch, we try GOT indirection to PC relative optimization in
1368+ // normal or medium code model, whether or not with R_LARCH_RELAX
1369+ // relocation. Moreover, if the original code sequence can be relaxed to a
1370+ // single instruction `pcaddi`, the first instruction will be removed and
1371+ // it will not reach here.
1372+ if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) {
1373+ bool isRelax = relaxable (relocs, i);
1374+ const Relocation lo12Rel = isRelax ? relocs[i + 2 ] : relocs[i + 1 ];
1375+ if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
1376+ tryGotToPCRel (loc, rel, lo12Rel, secAddr)) {
1377+ // isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12
1378+ // !isRelax: skip relocation R_LARCH_GOT_PC_LO12
1379+ i += isRelax ? 2 : 1 ;
1380+ continue ;
1381+ }
1382+ }
1383+ break ;
12671384 default :
12681385 break ;
12691386 }
0 commit comments