Skip to content

Commit ea9fea2

Browse files
committed
Relax PCHi20Lo12.
Support relaxation optimization for two types of code sequences. ``` From: pcalau12i $a0, %pc_hi20(sym) R_LARCH_PCALA_HI20, R_LARCH_RELAX addi.w/d $a0, $a0, %pc_lo12(sym) R_LARCH_PCALA_LO12, R_LARCH_RELAX To: pcaddi $a0, %pc_lo12(sym) R_LARCH_PCREL20_S2 From: pcalau12i $a0, %got_pc_hi20(sym_got) R_LARCH_GOT_PC_HI20, R_LARCH_RELAX ld.w/d $a0, $a0, %got_pc_lo12(sym_got) R_LARCH_GOT_PC_LO12, R_LARCH_RELAX To: pcaddi $a0, %got_pc_hi20(sym_got) R_LARCH_PCREL20_S2 ```
1 parent 0288d06 commit ea9fea2

File tree

1 file changed

+108
-2
lines changed

1 file changed

+108
-2
lines changed

lld/ELF/Arch/LoongArch.cpp

Lines changed: 108 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ enum Op {
5353
ADDI_W = 0x02800000,
5454
ADDI_D = 0x02c00000,
5555
ANDI = 0x03400000,
56+
PCADDI = 0x18000000,
5657
PCADDU12I = 0x1c000000,
5758
LD_W = 0x28800000,
5859
LD_D = 0x28c00000,
@@ -131,6 +132,8 @@ static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131132
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
132133
}
133134

135+
static uint32_t getD5(uint64_t v) { return extractBits(v, 4, 0); }
136+
134137
static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
135138
uint32_t immLo = extractBits(imm, 15, 0);
136139
uint32_t immHi = extractBits(imm, 20, 16);
@@ -743,6 +746,84 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
743746
}
744747
}
745748

749+
static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
750+
return i + 1 < relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
751+
}
752+
753+
static bool isPairRelaxable(ArrayRef<Relocation> relocs, size_t i) {
754+
return relaxable(relocs, i) && relaxable(relocs, i + 2) &&
755+
relocs[i].offset + 4 == relocs[i + 2].offset;
756+
}
757+
758+
// Relax code sequence.
759+
// From:
760+
// pcalau12i $a0, %pc_hi20(sym)
761+
// addi.w/d $a0, $a0, %pc_lo12(sym)
762+
// To:
763+
// pcaddi $a0, %pc_lo12(sym)
764+
//
765+
// From:
766+
// pcalau12i $a0, %got_pc_hi20(sym_got)
767+
// ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
768+
// To:
769+
// pcaddi $a0, %got_pc_hi20(sym_got)
770+
static void relaxPCHi20Lo12(Ctx &ctx, const InputSection &sec, size_t i,
771+
uint64_t loc, Relocation &rHi20, Relocation &rLo12,
772+
uint32_t &remove) {
773+
// check if the relocations are relaxable sequences.
774+
if (!((rHi20.type == R_LARCH_PCALA_HI20 &&
775+
rLo12.type == R_LARCH_PCALA_LO12) ||
776+
(rHi20.type == R_LARCH_GOT_PC_HI20 &&
777+
rLo12.type == R_LARCH_GOT_PC_LO12)))
778+
return;
779+
780+
// GOT references to absolute symbols can't be relaxed to use pcaddi in
781+
// position-independent code, because these instructions produce a relative
782+
// address.
783+
// Meanwhile skip undefined, preemptible and STT_GNU_IFUNC symbols, because
784+
// these symbols may be resolve in runtime.
785+
if (rHi20.type == R_LARCH_GOT_PC_HI20 &&
786+
(!rHi20.sym->isDefined() || rHi20.sym->isPreemptible ||
787+
rHi20.sym->isGnuIFunc() ||
788+
(ctx.arg.isPic && !cast<Defined>(*rHi20.sym).section)))
789+
return;
790+
791+
uint64_t symBase = 0;
792+
if (rHi20.expr == RE_LOONGARCH_PLT_PAGE_PC)
793+
symBase = rHi20.sym->getPltVA(ctx);
794+
else if (rHi20.expr == RE_LOONGARCH_PAGE_PC ||
795+
rHi20.expr == RE_LOONGARCH_GOT_PAGE_PC)
796+
symBase = rHi20.sym->getVA(ctx);
797+
else {
798+
Err(ctx) << getErrorLoc(ctx, (const uint8_t *)loc) << "unknown expr ("
799+
<< rHi20.expr << ") against symbol " << rHi20.sym
800+
<< "in relaxPCHi20Lo12";
801+
return;
802+
}
803+
const uint64_t symLocal = symBase + rHi20.addend;
804+
805+
const int64_t distance = symLocal - loc;
806+
// Check if the distance aligns 4 bytes or exceeds the range of pcaddi.
807+
if ((distance & 0x3) != 0 || !isInt<22>(distance))
808+
return;
809+
810+
// Note: If we can ensure that the .o files generated by LLVM only contain
811+
// relaxable instruction sequences with R_LARCH_RELAX, then we do not need to
812+
// decode instructions. The relaxable instruction sequences imply the
813+
// following constraints:
814+
// * For relocation pairs related to got_pc, the opcodes of instructions
815+
// must be pcalau12i + ld.w/d. In other cases, the opcodes must be pcalau12i +
816+
// addi.w/d.
817+
// * The destination register of pcalau12i is guaranteed to be used only by
818+
// the immediately following instruction.
819+
const uint32_t nextInsn = read32le(sec.content().data() + rLo12.offset);
820+
821+
sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
822+
sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
823+
sec.relaxAux->writes.push_back(insn(PCADDI, getD5(nextInsn), 0, 0));
824+
remove = 4;
825+
}
826+
746827
static bool relax(Ctx &ctx, InputSection &sec) {
747828
const uint64_t secAddr = sec.getVA();
748829
const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -781,6 +862,12 @@ static bool relax(Ctx &ctx, InputSection &sec) {
781862
}
782863
break;
783864
}
865+
case R_LARCH_PCALA_HI20:
866+
case R_LARCH_GOT_PC_HI20:
867+
// The overflow check for i+2 will be carried out in isPairRelaxable.
868+
if (isPairRelaxable(relocs, i))
869+
relaxPCHi20Lo12(ctx, sec, i, loc, r, relocs[i + 2], remove);
870+
break;
784871
}
785872

786873
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -851,6 +938,7 @@ void LoongArch::finalizeRelax(int passes) const {
851938
MutableArrayRef<Relocation> rels = sec->relocs();
852939
ArrayRef<uint8_t> old = sec->content();
853940
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
941+
size_t writesIdx = 0;
854942
uint8_t *p = ctx.bAlloc.Allocate<uint8_t>(newSize);
855943
uint64_t offset = 0;
856944
int64_t delta = 0;
@@ -867,11 +955,29 @@ void LoongArch::finalizeRelax(int passes) const {
867955
continue;
868956

869957
// Copy from last location to the current relocated location.
870-
const Relocation &r = rels[i];
958+
Relocation &r = rels[i];
871959
uint64_t size = r.offset - offset;
872960
memcpy(p, old.data() + offset, size);
873961
p += size;
874-
offset = r.offset + remove;
962+
963+
int64_t skip = 0;
964+
if (RelType newType = aux.relocTypes[i]) {
965+
switch (newType) {
966+
case R_LARCH_RELAX:
967+
break;
968+
case R_LARCH_PCREL20_S2:
969+
skip = 4;
970+
write32le(p, aux.writes[writesIdx++]);
971+
// RelExpr is needed for relocating.
972+
r.expr = r.sym->hasFlag(NEEDS_PLT) ? R_PLT_PC : R_PC;
973+
break;
974+
default:
975+
llvm_unreachable("unsupported type");
976+
}
977+
}
978+
979+
p += skip;
980+
offset = r.offset + skip + remove;
875981
}
876982
memcpy(p, old.data() + offset, old.size() - offset);
877983

0 commit comments

Comments
 (0)