@@ -53,6 +53,7 @@ enum Op {
5353 ADDI_W = 0x02800000 ,
5454 ADDI_D = 0x02c00000 ,
5555 ANDI = 0x03400000 ,
56+ PCADDI = 0x18000000 ,
5657 PCADDU12I = 0x1c000000 ,
5758 LD_W = 0x28800000 ,
5859 LD_D = 0x28c00000 ,
@@ -131,6 +132,8 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131132 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1 )) - 1 )) >> end;
132133}
133134
135+ static uint32_t getD5 (uint64_t v) { return extractBits (v, 4 , 0 ); }
136+
134137static uint32_t setD5k16 (uint32_t insn, uint32_t imm) {
135138 uint32_t immLo = extractBits (imm, 15 , 0 );
136139 uint32_t immHi = extractBits (imm, 20 , 16 );
@@ -743,6 +746,78 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
743746 }
744747}
745748
749+ static bool relaxable (ArrayRef<Relocation> relocs, size_t i) {
750+ return i + 1 < relocs.size () && relocs[i + 1 ].type == R_LARCH_RELAX;
751+ }
752+
753+ static bool isPairRelaxable (ArrayRef<Relocation> relocs, size_t i) {
754+ return relaxable (relocs, i) && relaxable (relocs, i + 2 ) &&
755+ relocs[i].offset + 4 == relocs[i + 2 ].offset ;
756+ }
757+
758+ // Relax code sequence.
759+ // From:
760+ // pcalau12i $a0, %pc_hi20(sym)
761+ // addi.w/d $a0, $a0, %pc_lo12(sym)
762+ // To:
763+ // pcaddi $a0, %pc_lo12(sym)
764+ //
765+ // From:
766+ // pcalau12i $a0, %got_pc_hi20(sym_got)
767+ // ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
768+ // To:
769+ // pcaddi $a0, %got_pc_hi20(sym_got)
770+ static void relaxPCHi20Lo12 (Ctx &ctx, const InputSection &sec, size_t i,
771+ uint64_t loc, Relocation &rHi20, Relocation &rLo12,
772+ uint32_t &remove) {
773+ // check if the relocations are relaxable sequences.
774+ if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
775+ rLo12.type == R_LARCH_PCALA_LO12) ||
776+ (rHi20.type == R_LARCH_GOT_PC_HI20 &&
777+ rLo12.type == R_LARCH_GOT_PC_LO12)))
778+ return ;
779+
780+ // GOT references to absolute symbols can't be relaxed to use pcaddi in
781+ // position-independent code, because these instructions produce a relative
782+ // address.
783+ // Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
784+ // these symbols may be resolve in runtime.
785+ if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
786+ (!rHi20.sym ->isDefined () || rHi20.sym ->isPreemptible ||
787+ rHi20.sym ->isGnuIFunc () ||
788+ (ctx.arg .isPic && !cast<Defined>(*rHi20.sym ).section )))
789+ return ;
790+
791+ uint64_t symBase = 0 ;
792+ if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
793+ symBase = rHi20.sym ->getPltVA (ctx);
794+ else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
795+ rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
796+ symBase = rHi20.sym ->getVA (ctx);
797+ else {
798+ Err (ctx) << getErrorLoc (ctx, (const uint8_t *)loc) << " unknown expr ("
799+ << rHi20.expr << " ) against symbol " << rHi20.sym
800+ << " in relaxPCHi20Lo12" ;
801+ return ;
802+ }
803+ const uint64_t symLocal = symBase + rHi20.addend ;
804+
805+ const int64_t distance = symLocal - loc;
806+ // Check if the distance aligns 4 bytes or exceeds the range of pcaddi.
807+ if ((distance & 0x3 ) != 0 || !isInt<22 >(distance))
808+ return ;
809+
810+ // Note: If we can ensure that the .o files generated by LLVM only contain
811+ // relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
812+ // check instruction opcodes.
813+ const uint32_t nextInsn = read32le (sec.content ().data () + rLo12.offset );
814+
815+ sec.relaxAux ->relocTypes [i] = R_LARCH_RELAX;
816+ sec.relaxAux ->relocTypes [i + 2 ] = R_LARCH_PCREL20_S2;
817+ sec.relaxAux ->writes .push_back (insn (PCADDI, getD5 (nextInsn), 0 , 0 ));
818+ remove = 4 ;
819+ }
820+
746821static bool relax (Ctx &ctx, InputSection &sec) {
747822 const uint64_t secAddr = sec.getVA ();
748823 const MutableArrayRef<Relocation> relocs = sec.relocs ();
@@ -781,6 +856,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
781856 }
782857 break ;
783858 }
859+ case R_LARCH_PCALA_HI20:
860+ case R_LARCH_GOT_PC_HI20:
861+ // The overflow check for i+2 will be carried out in isPairRelaxable.
862+ if (isPairRelaxable (relocs, i))
863+ relaxPCHi20Lo12 (ctx, sec, i, loc, r, relocs[i + 2 ], remove);
864+ break ;
784865 }
785866
786867 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -851,6 +932,7 @@ void LoongArch::finalizeRelax(int passes) const {
851932 MutableArrayRef<Relocation> rels = sec->relocs ();
852933 ArrayRef<uint8_t > old = sec->content ();
853934 size_t newSize = old.size () - aux.relocDeltas [rels.size () - 1 ];
935+ size_t writesIdx = 0 ;
854936 uint8_t *p = ctx.bAlloc .Allocate <uint8_t >(newSize);
855937 uint64_t offset = 0 ;
856938 int64_t delta = 0 ;
@@ -867,11 +949,29 @@ void LoongArch::finalizeRelax(int passes) const {
867949 continue ;
868950
869951 // Copy from last location to the current relocated location.
870- const Relocation &r = rels[i];
952+ Relocation &r = rels[i];
871953 uint64_t size = r.offset - offset;
872954 memcpy (p, old.data () + offset, size);
873955 p += size;
874- offset = r.offset + remove;
956+
957+ int64_t skip = 0 ;
958+ if (RelType newType = aux.relocTypes [i]) {
959+ switch (newType) {
960+ case R_LARCH_RELAX:
961+ break ;
962+ case R_LARCH_PCREL20_S2:
963+ skip = 4 ;
964+ write32le (p, aux.writes [writesIdx++]);
965+ // RelExpr is needed for relocating.
966+ r.expr = r.sym ->hasFlag (NEEDS_PLT) ? R_PLT_PC : R_PC;
967+ break ;
968+ default :
969+ llvm_unreachable (" unsupported type" );
970+ }
971+ }
972+
973+ p += skip;
974+ offset = r.offset + skip + remove;
875975 }
876976 memcpy (p, old.data () + offset, old.size () - offset);
877977
0 commit comments