@@ -39,7 +39,15 @@ class LoongArch final : public TargetInfo {
3939 void relocate (uint8_t *loc, const Relocation &rel,
4040 uint64_t val) const override ;
4141 bool relaxOnce (int pass) const override ;
42+ RelExpr adjustTlsExpr (RelType type, RelExpr expr) const override ;
43+ void relocateAlloc (InputSectionBase &sec, uint8_t *buf) const override ;
4244 void finalizeRelax (int passes) const override ;
45+
46+ private:
47+ void tlsdescToIe (uint8_t *loc, const Relocation &rel, uint64_t val,
48+ bool isExtreme) const ;
49+ void tlsdescToLe (uint8_t *loc, const Relocation &rel, uint64_t val,
50+ bool isExtreme) const ;
4351};
4452} // end anonymous namespace
4553
@@ -53,16 +61,23 @@ enum Op {
5361 ADDI_W = 0x02800000 ,
5462 ADDI_D = 0x02c00000 ,
5563 ANDI = 0x03400000 ,
64+ ORI = 0x03800000 ,
65+ LU12I_W = 0x14000000 ,
66+ LU32I_D = 0x16000000 ,
67+ LU52I_D = 0x03000000 ,
68+ PCALAU12I = 0x1a000000 ,
5669 PCADDU12I = 0x1c000000 ,
5770 LD_W = 0x28800000 ,
5871 LD_D = 0x28c00000 ,
72+ LDX_D = 0x380c0000 ,
5973 JIRL = 0x4c000000 ,
6074};
6175
6276enum Reg {
6377 R_ZERO = 0 ,
6478 R_RA = 1 ,
6579 R_TP = 2 ,
80+ R_A0 = 4 ,
6681 R_T0 = 12 ,
6782 R_T1 = 13 ,
6883 R_T2 = 14 ,
@@ -131,6 +146,10 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131146 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1 )) - 1 )) >> end;
132147}
133148
149+ static uint32_t getD5 (uint64_t v) { return extractBits (v, 4 , 0 ); }
150+
151+ static uint32_t getJ5 (uint64_t v) { return extractBits (v, 9 , 5 ); }
152+
134153static uint32_t setD5k16 (uint32_t insn, uint32_t imm) {
135154 uint32_t immLo = extractBits (imm, 15 , 0 );
136155 uint32_t immHi = extractBits (imm, 20 , 16 );
@@ -812,6 +831,255 @@ static bool relax(Ctx &ctx, InputSection &sec) {
812831 return changed;
813832}
814833
834+ // Convert TLSDESC GD/LD to IE.
835+ // The code sequence obtained in the normal or medium code model is as follows:
836+ // * pcalau12i $a0, %ie_pc_hi20(sym_ie)
837+ // * ld.[wd] $a0, $a0, %ie_pc_lo12(sym_ie)
838+ //
839+ // The code sequence obtained in the extreme code model is as follows:
840+ // * pcalau12i $a0, %ie_pc_hi20(sym_ie_large)
841+ // * addi.d $a1, $r0, %ie_pc_lo12(sym_ie_large)
842+ // * lu32i.d $a1, %ie64_pc_lo20(sym_ie_large)
843+ // * lu52i.d $a1, $a1, %ie64_pc_hi12(sym_ie_large)
844+ // * ldx.d $a0, $a0, $a1
845+ //
846+ // The optimization for tlsdescToIe and tlsdescToLe in the extreme code model
847+ // always preserves the preceding code sequence and converts the last
848+ // instructions to NOP, while the optimization for the normal or medium code
849+ // models of tlsdescToIe and tlsdescToLe do the oppsite. This is because, in
850+ // the extreme code model, tlsdescToIe requires a temporary register $a1, and
851+ // the preceding unoptimized tls.desc code sequence contains this temporary
852+ // register, eliminating additional bookkeeping.
853+ void LoongArch::tlsdescToIe (uint8_t *loc, const Relocation &rel, uint64_t val,
854+ bool isExtreme) const {
855+ if (isExtreme) { // extreme
856+ const uint32_t currInsn = read32le (loc);
857+ switch (rel.type ) {
858+ case R_LARCH_TLS_DESC_PC_HI20:
859+ write32le (loc, insn (PCALAU12I, R_A0, 0 , 0 )); // pcalau12i $a0, %ie_pc_hi20
860+ relocateNoSym (loc, R_LARCH_TLS_IE_PC_HI20, val);
861+ break ;
862+ case R_LARCH_TLS_DESC_PC_LO12:
863+ write32le (loc, insn (ADDI_D, getD5 (currInsn), R_ZERO,
864+ 0 )); // addi.d $a1, $r0, %ie_pc_lo12
865+ relocateNoSym (loc, R_LARCH_TLS_IE_PC_LO12, val);
866+ break ;
867+ case R_LARCH_TLS_DESC64_PC_LO20:
868+ write32le (loc, insn (LU32I_D, getD5 (currInsn), 0 ,
869+ 0 )); // lu32i.d $a1, %ie64_pc_lo20
870+ relocateNoSym (loc, R_LARCH_TLS_IE64_PC_LO20, val);
871+ break ;
872+ case R_LARCH_TLS_DESC64_PC_HI12:
873+ write32le (loc, insn (LU52I_D, getD5 (currInsn), getJ5 (currInsn),
874+ 0 )); // lu52i.d $a1, $a1, %ie64_pc_hi12
875+ relocateNoSym (loc, R_LARCH_TLS_IE64_PC_HI12, val);
876+ write32le (loc + 4 , insn (LDX_D, R_A0, R_A0,
877+ getD5 (currInsn))); // ldx.d $a0, $a0, $a1
878+ break ;
879+ case R_LARCH_TLS_DESC_LD:
880+ case R_LARCH_TLS_DESC_CALL:
881+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
882+ break ;
883+ default :
884+ llvm_unreachable (" unsupported relocation for TLSDESC to IE" );
885+ }
886+ } else { // normal or medium
887+ switch (rel.type ) {
888+ case R_LARCH_TLS_DESC_PC_HI20:
889+ case R_LARCH_TLS_DESC_PC_LO12:
890+ case R_LARCH_TLS_DESC_PCREL20_S2:
891+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
892+ break ;
893+ case R_LARCH_TLS_DESC_LD:
894+ write32le (loc, insn (PCALAU12I, R_A0, 0 , 0 )); // pcalau12i $a0, %ie_pc_hi20
895+ relocateNoSym (loc, R_LARCH_TLS_IE_PC_HI20, val);
896+ break ;
897+ case R_LARCH_TLS_DESC_CALL:
898+ write32le (loc, insn (ctx.arg .is64 ? LD_D : LD_W, R_A0, R_A0,
899+ 0 )); // ld.[wd] $a0, $a0, %ie_pc_lo12
900+ relocateNoSym (loc, R_LARCH_TLS_IE_PC_LO12, val);
901+ break ;
902+ default :
903+ llvm_unreachable (" unsupported relocation for TLSDESC to IE" );
904+ }
905+ }
906+ }
907+
908+ // Convert TLSDESC GD/LD to LE.
909+ // The code sequence obtained in the normal or medium code model is as follows:
910+ // * lu12i.w $a0, %le_hi20(sym_le) # le_hi20 != 0
911+ // * ori $a0 $a0, %le_lo12(sym_le)
912+ //
913+ // The code sequence obtained in extreme code model is as follows:
914+ // * lu12i.w $a0, %le_hi20(sym_le_large)
915+ // * ori $a0, $a0, %le_lo12(sym_le_large)
916+ // * lu32i.d $a0, %le64_lo20(sym_le_large)
917+ // * lu52i.d $a0, $a0, %le64_hi20(sym_le_large)
918+ //
919+ // Note: In the extreme code model, it is possible for the generated code
920+ // sequence to include NOPs at both beginning and the end. Likely,
921+ // * nop; ori $a0, $r0, %le_lo12; nop; nop
922+ // This occurs because the four instructions are used to assemble each part of a
923+ // 64-bit value independently, without affecting each other. Therefore, to
924+ // obtain an efficient code sequence, NOPs are used as much as possible.
925+ // Additionally, the extreme code model does not participate in relaxation
926+ // optimization.
927+ void LoongArch::tlsdescToLe (uint8_t *loc, const Relocation &rel, uint64_t val,
928+ bool isExtreme) const {
929+ if (isExtreme) { // extreme
930+ switch (rel.type ) {
931+ case R_LARCH_TLS_DESC_PC_HI20:
932+ if (uint32_t hi20 = extractBits (val, 31 , 12 ))
933+ write32le (loc,
934+ insn (LU12I_W, R_A0, hi20, 0 )); // lu12i.w $a0, $a0, %le_hi20
935+ else
936+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
937+ break ;
938+ case R_LARCH_TLS_DESC_PC_LO12:
939+ if (extractBits (val, 31 , 12 ))
940+ write32le (loc,
941+ insn (ORI, R_A0, R_A0, lo12 (val))); // ori $a0, $a0, %le_lo12
942+ else
943+ write32le (loc,
944+ insn (ORI, R_A0, R_ZERO, lo12 (val))); // ori $a0, $r0, %le_lo12
945+ break ;
946+ case R_LARCH_TLS_DESC64_PC_LO20:
947+ // If val[31] is 1, lu12i.w will set $a0[51-32]. So, clear it.
948+ if (uint32_t lo20 = extractBits (val, 51 , 32 ) || extractBits (val, 31 , 31 ))
949+ write32le (loc, insn (LU32I_D, R_A0, lo20, 0 )); // lu32i.d $a0, %le64_lo20
950+ else
951+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
952+ break ;
953+ case R_LARCH_TLS_DESC64_PC_HI12:
954+ // If val[31] is 1, lu12i.w will set $a0[63-52]. So, clear it.
955+ if (uint32_t hi12 =
956+ extractBits (val, 63 , 52 ) || extractBits (val, 31 , 31 )) {
957+ write32le (loc, insn (LU52I_D, R_A0, R_A0,
958+ hi12)); // lu52i.d $a0, $a0, %le64_hi20
959+ // Due to add.d does not include relocation, an additional NOP needs to
960+ // be generated.
961+ write32le (loc + 4 , insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
962+ } else {
963+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
964+ write32le (loc + 4 , insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
965+ }
966+ break ;
967+ case R_LARCH_TLS_DESC_LD:
968+ case R_LARCH_TLS_DESC_CALL:
969+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
970+ break ;
971+ default :
972+ llvm_unreachable (" unsupported relocation for TLSDESC to LE" );
973+ }
974+ } else { // normal or medium
975+ switch (rel.type ) {
976+ case R_LARCH_TLS_DESC_PC_HI20:
977+ case R_LARCH_TLS_DESC_PC_LO12:
978+ case R_LARCH_TLS_DESC_PCREL20_S2:
979+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
980+ break ;
981+ case R_LARCH_TLS_DESC_LD:
982+ if (isUInt<12 >(val))
983+ write32le (loc, insn (ANDI, R_ZERO, R_ZERO, 0 )); // nop
984+ else if (isInt<32 >(val))
985+ write32le (loc, insn (LU12I_W, R_A0, extractBits (val, 31 , 12 ),
986+ 0 )); // lu12i.w $a0, %le_hi20
987+ else
988+ Err (ctx) << val
989+ << " exceeds the range of medium code model in tlsdescToLe" ;
990+ break ;
991+ case R_LARCH_TLS_DESC_CALL:
992+ if (isUInt<12 >(val))
993+ write32le (loc, insn (ORI, R_A0, R_ZERO, val)); // ori $a0, $r0, %le_lo12
994+ else if (isInt<32 >(val))
995+ write32le (loc,
996+ insn (ORI, R_A0, R_A0, lo12 (val))); // ori $a0, $a0, %le_lo12
997+ else
998+ Err (ctx) << val
999+ << " exceeds the range of medium code model in tlsdescToLe" ;
1000+ break ;
1001+ default :
1002+ llvm_unreachable (" unsupported relocation for TLSDESC to LE" );
1003+ }
1004+ }
1005+ }
1006+
1007+ // During GD_TO_IE, the converted code sequence always includes an instruction
1008+ // related to the Lo12 relocation (ld.[wd] or addi.d). To obtain correct val in
1009+ // `getRelocTargetVA`, expr of this instruction should be adjusted to
1010+ // R_RELAX_TLS_GD_TO_IE_ABS, while expr of other valid instructions (not NOP)
1011+ // should be adjusted to RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC.
1012+ // See the comment in tlsdescToIe for detailed information.
1013+ //
1014+ // Specifically, in the normal or medium code model, the instruction with
1015+ // relocation R_LARCH_TLS_DESC_CALL is the candidate of Lo12 relocation. And in
1016+ // the extreme code model, the instruction with R_LARCH_TLS_DESC_PC_LO12 is the
1017+ // candidate. Meanwhile, in the normal or medium code model, the instruction
1018+ // with R_LARCH_TLS_DESC_PC_LO12 will always be converted to NOP. Similarly, in
1019+ // the extreme code model, the instruction with R_LARCH_TLS_DESC_CALL will be
1020+ // converted to NOP. Therefore, the adjustment of the expr here is safe.
1021+ RelExpr LoongArch::adjustTlsExpr (RelType type, RelExpr expr) const {
1022+ if (expr == R_RELAX_TLS_GD_TO_IE) {
1023+ if (type != R_LARCH_TLS_DESC_PC_LO12 && type != R_LARCH_TLS_DESC_CALL)
1024+ return RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC;
1025+ return R_RELAX_TLS_GD_TO_IE_ABS;
1026+ }
1027+ return expr;
1028+ }
1029+
1030+ void LoongArch::relocateAlloc (InputSectionBase &sec, uint8_t *buf) const {
1031+ const unsigned bits = ctx.arg .is64 ? 64 : 32 ;
1032+ uint64_t secAddr = sec.getOutputSection ()->addr ;
1033+ if (auto *s = dyn_cast<InputSection>(&sec))
1034+ secAddr += s->outSecOff ;
1035+ else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
1036+ secAddr += ehIn->getParent ()->outSecOff ;
1037+ bool isExtreme = false ;
1038+ const ArrayRef<Relocation> relocs = sec.relocs ();
1039+ for (size_t i = 0 , size = relocs.size (); i != size; ++i) {
1040+ const Relocation &rel = relocs[i];
1041+ uint8_t *loc = buf + rel.offset ;
1042+ const uint64_t val = SignExtend64 (
1043+ sec.getRelocTargetVA (ctx, rel, secAddr + rel.offset ), bits);
1044+
1045+ switch (rel.expr ) {
1046+ case R_RELAX_HINT:
1047+ continue ;
1048+ case RE_LOONGARCH_RELAX_TLS_GD_TO_IE_PAGE_PC:
1049+ if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1050+ // The relocation sequence in the extreme code model is as follows:
1051+ //
1052+ // * i -- R_LARCH_TLS_DESC_PC_HI20
1053+ // * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1054+ // * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1055+ // * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1056+ isExtreme =
1057+ (i + 2 < size && relocs[i + 2 ].type == R_LARCH_TLS_DESC64_PC_LO20);
1058+ }
1059+ [[fallthrough]];
1060+ case R_RELAX_TLS_GD_TO_IE_ABS:
1061+ tlsdescToIe (loc, rel, val, isExtreme);
1062+ continue ;
1063+ case R_RELAX_TLS_GD_TO_LE:
1064+ if (rel.type == R_LARCH_TLS_DESC_PC_HI20) {
1065+ // The relocation sequence in the extreme code model is as follows:
1066+ //
1067+ // * i -- R_LARCH_TLS_DESC_PC_HI20
1068+ // * i+1 -- R_LARCH_TLS_DESC_PC_LO12
1069+ // * i+2 -- R_LARCH_TLS_DESC64_PC_LO20
1070+ // * i+3 -- R_LARCH_TLS_DESC64_PC_HI12
1071+ isExtreme =
1072+ (i + 2 < size && relocs[i + 2 ].type == R_LARCH_TLS_DESC64_PC_LO20);
1073+ }
1074+ tlsdescToLe (loc, rel, val, isExtreme);
1075+ continue ;
1076+ default :
1077+ break ;
1078+ }
1079+ relocate (loc, rel, val);
1080+ }
1081+ }
1082+
8151083// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
8161084// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
8171085// shrinkage may reduce displacement and make more relocations eligible for
0 commit comments