@@ -53,6 +53,7 @@ enum Op {
5353 ADDI_W = 0x02800000 ,
5454 ADDI_D = 0x02c00000 ,
5555 ANDI = 0x03400000 ,
56+ PCADDI = 0x18000000 ,
5657 PCADDU12I = 0x1c000000 ,
5758 LD_W = 0x28800000 ,
5859 LD_D = 0x28c00000 ,
@@ -131,6 +132,8 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131132 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1 )) - 1 )) >> end;
132133}
133134
135+ static uint32_t getD5 (uint64_t v) { return extractBits (v, 4 , 0 ); }
136+
134137static uint32_t setD5k16 (uint32_t insn, uint32_t imm) {
135138 uint32_t immLo = extractBits (imm, 15 , 0 );
136139 uint32_t immHi = extractBits (imm, 20 , 16 );
@@ -743,6 +746,84 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
743746 }
744747}
745748
749+ static bool relaxable (ArrayRef<Relocation> relocs, size_t i) {
750+ return i + 1 < relocs.size () && relocs[i + 1 ].type == R_LARCH_RELAX;
751+ }
752+
753+ static bool isPairRelaxable (ArrayRef<Relocation> relocs, size_t i) {
754+ return relaxable (relocs, i) && relaxable (relocs, i + 2 ) &&
755+ relocs[i].offset + 4 == relocs[i + 2 ].offset ;
756+ }
757+
758+ // Relax code sequence.
759+ // From:
760+ // pcalau12i $a0, %pc_hi20(sym)
761+ // addi.w/d $a0, $a0, %pc_lo12(sym)
762+ // To:
763+ // pcaddi $a0, %pc_lo12(sym)
764+ //
765+ // From:
766+ // pcalau12i $a0, %got_pc_hi20(sym_got)
767+ // ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
768+ // To:
769+ // pcaddi $a0, %got_pc_hi20(sym_got)
770+ static void relaxPCHi20Lo12 (Ctx &ctx, const InputSection &sec, size_t i,
771+ uint64_t loc, Relocation &rHi20, Relocation &rLo12,
772+ uint32_t &remove) {
773+ // check if the relocations are relaxable sequences.
774+ if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
775+ rLo12.type == R_LARCH_PCALA_LO12) ||
776+ (rHi20.type == R_LARCH_GOT_PC_HI20 &&
777+ rLo12.type == R_LARCH_GOT_PC_LO12)))
778+ return ;
779+
780+ // GOT references to absolute symbols can't be relaxed to use pcaddi in
781+ // position-independent code, because these instructions produce a relative
782+ // address.
783+ // Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
784+ // these symbols may be resolve in runtime.
785+ if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
786+ (!rHi20.sym ->isDefined () || rHi20.sym ->isPreemptible ||
787+ rHi20.sym ->isGnuIFunc () ||
788+ (ctx.arg .isPic && !cast<Defined>(*rHi20.sym ).section )))
789+ return ;
790+
791+ uint64_t symBase = 0 ;
792+ if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
793+ symBase = rHi20.sym ->getPltVA (ctx);
794+ else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
795+ rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
796+ symBase = rHi20.sym ->getVA (ctx);
797+ else {
798+ Err (ctx) << getErrorLoc (ctx, (const uint8_t *)loc) << " unknown expr ("
799+ << rHi20.expr << " ) against symbol " << rHi20.sym
800+ << " in relaxPCHi20Lo12" ;
801+ return ;
802+ }
803+ const uint64_t symLocal = symBase + rHi20.addend ;
804+
805+ const int64_t distance = symLocal - loc;
806+ // Check if the distance aligns 4 bytes or exceeds the range of pcaddi.
807+ if ((distance & 0x3 ) != 0 || !isInt<22 >(distance))
808+ return ;
809+
810+ // Note: If we can ensure that the .o files generated by LLVM only contain
811+ // relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
812+ // decode instructions. The relaxable instruction sequences imply the
813+ // following constraints:
814+ // * For relocation pairs related to got_pc, the opcodes of instructions
815+ // must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
816+ // addi.w/d.
817+ // * The destination register of pcalau12i is guaranteed to be used only by
818+ // the immediately following instruction.
819+ const uint32_t nextInsn = read32le (sec.content ().data () + rLo12.offset );
820+
821+ sec.relaxAux ->relocTypes [i] = R_LARCH_RELAX;
822+ sec.relaxAux ->relocTypes [i + 2 ] = R_LARCH_PCREL20_S2;
823+ sec.relaxAux ->writes .push_back (insn (PCADDI, getD5 (nextInsn), 0 , 0 ));
824+ remove = 4 ;
825+ }
826+
746827static bool relax (Ctx &ctx, InputSection &sec) {
747828 const uint64_t secAddr = sec.getVA ();
748829 const MutableArrayRef<Relocation> relocs = sec.relocs ();
@@ -781,6 +862,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
781862 }
782863 break ;
783864 }
865+ case R_LARCH_PCALA_HI20:
866+ case R_LARCH_GOT_PC_HI20:
867+ // The overflow check for i+2 will be carried out in isPairRelaxable.
868+ if (isPairRelaxable (relocs, i))
869+ relaxPCHi20Lo12 (ctx, sec, i, loc, r, relocs[i + 2 ], remove);
870+ break ;
784871 }
785872
786873 // For all anchors whose offsets are <= r.offset, they are preceded by
@@ -851,6 +938,7 @@ void LoongArch::finalizeRelax(int passes) const {
851938 MutableArrayRef<Relocation> rels = sec->relocs ();
852939 ArrayRef<uint8_t > old = sec->content ();
853940 size_t newSize = old.size () - aux.relocDeltas [rels.size () - 1 ];
941+ size_t writesIdx = 0 ;
854942 uint8_t *p = ctx.bAlloc .Allocate <uint8_t >(newSize);
855943 uint64_t offset = 0 ;
856944 int64_t delta = 0 ;
@@ -867,11 +955,29 @@ void LoongArch::finalizeRelax(int passes) const {
867955 continue ;
868956
869957 // Copy from last location to the current relocated location.
870- const Relocation &r = rels[i];
958+ Relocation &r = rels[i];
871959 uint64_t size = r.offset - offset;
872960 memcpy (p, old.data () + offset, size);
873961 p += size;
874- offset = r.offset + remove;
962+
963+ int64_t skip = 0 ;
964+ if (RelType newType = aux.relocTypes [i]) {
965+ switch (newType) {
966+ case R_LARCH_RELAX:
967+ break ;
968+ case R_LARCH_PCREL20_S2:
969+ skip = 4 ;
970+ write32le (p, aux.writes [writesIdx++]);
971+ // RelExpr is needed for relocating.
972+ r.expr = r.sym ->hasFlag (NEEDS_PLT) ? R_PLT_PC : R_PC;
973+ break ;
974+ default :
975+ llvm_unreachable (" unsupported type" );
976+ }
977+ }
978+
979+ p += skip;
980+ offset = r.offset + skip + remove;
875981 }
876982 memcpy (p, old.data () + offset, old.size () - offset);
877983
0 commit comments