-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[lld][LoongArch] Relax R_LARCH_PCALA_HI20 and R_LARCH_PCALA_LO12 #112696
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
A pair of pcalau12i+addi.d relocated by R_LARCH_PCALA_HI20 and R_LARCH_PCALA_LO12 can be converted to pcaddi.
|
Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the LLVM GitHub User Guide. You can also ask questions in a comment on this PR, on the LLVM Discord or on the forums. |
|
@llvm/pr-subscribers-lld-elf @llvm/pr-subscribers-lld Author: None (ywgrit) Changes@SixWeining @MQ-mengqing Full diff: https://github.com/llvm/llvm-project/pull/112696.diff 4 Files Affected:
diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index 5923cda2298b4e..e7276c23d0ab88 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -22,6 +22,40 @@ using namespace lld;
using namespace lld::elf;
namespace {
+#define LARCH_GET_RD(insn) (insn & 0x1f)
+#define LARCH_GET_RJ(insn) ((insn >> 5) & 0x1f)
+#define LARCH_MK_ADDI_D 0xffc00000
+#define LARCH_OP_ADDI_D 0x02c00000
+#define LARCH_MK_PCADDI 0xfe000000
+#define LARCH_OP_PCADDI 0x18000000
+#define LARCH_MK_PCALAU12I 0xfe000000
+#define LARCH_OP_PCALAU12I 0x1a000000
+#define LARCH_MK_LD_D 0xffc00000
+#define LARCH_OP_LD_D 0x28c00000
+#define LARCH_MK_LD_W 0xffc00000
+#define LARCH_OP_LD_W 0x28800000
+#define LARCH_MK_LU12I_W 0xfe000000
+#define LARCH_OP_LU12I_W 0x14000000
+#define LARCH_MK_ORI 0xffc00000
+#define LARCH_OP_ORI 0x03800000
+#define LARCH_MK_B 0xfc000000
+#define LARCH_OP_B 0x50000000
+#define LARCH_MK_BL 0xfc000000
+#define LARCH_OP_BL 0x54000000
+#define LARCH_MK_JIRL 0xfc000000
+#define LARCH_OP_JIRL 0x4c000000
+#define LARCH_INSN_OPS(insn, op) ((insn & LARCH_MK_##op) == LARCH_OP_##op)
+#define LARCH_INSN_ADDI_D(insn) LARCH_INSN_OPS((insn), ADDI_D)
+#define LARCH_INSN_PCADDI(insn) LARCH_INSN_OPS((insn), PCADDI)
+#define LARCH_INSN_PCALAU12I(insn) LARCH_INSN_OPS((insn), PCALAU12I)
+#define LARCH_INSN_LD_D(insn) LARCH_INSN_OPS((insn), LD_D)
+#define LARCH_INSN_LD_W(insn) LARCH_INSN_OPS((insn), LD_W)
+#define LARCH_INSN_LU12I_W(insn) LARCH_INSN_OPS((insn), LU12I_W)
+#define LARCH_INSN_ORI(insn) LARCH_INSN_OPS((insn), ORI)
+#define LARCH_INSN_B(insn) LARCH_INSN_OPS((insn), B)
+#define LARCH_INSN_BL(insn) LARCH_INSN_OPS((insn), BL)
+#define LARCH_INSN_JIRL(insn) LARCH_INSN_OPS((insn), JIRL)
+
class LoongArch final : public TargetInfo {
public:
LoongArch(Ctx &);
@@ -38,11 +72,16 @@ class LoongArch final : public TargetInfo {
bool usesOnlyLowPageBits(RelType type) const override;
void relocate(uint8_t *loc, const Relocation &rel,
uint64_t val) const override;
+ void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
bool relaxOnce(int pass) const override;
void finalizeRelax(int passes) const override;
};
} // end anonymous namespace
+// The internal relocation number for local got relaxation which isn't part
+// of the psABI spec.
+#define INTERNAL_R_LARCH_PCALA_LO12 256
+
namespace {
enum Op {
SUB_W = 0x00110000,
@@ -63,6 +102,7 @@ enum Reg {
R_ZERO = 0,
R_RA = 1,
R_TP = 2,
+ R_A0 = 4,
R_T0 = 12,
R_T1 = 13,
R_T2 = 14,
@@ -744,6 +784,69 @@ void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
}
}
+static bool relaxable(ArrayRef<Relocation> relocs, size_t i) {
+ return i + 1 != relocs.size() && relocs[i + 1].type == R_LARCH_RELAX;
+}
+
+// Returns true if the two instructions corresponding to the i-th reloc
+// entry and the i+2-th reloc entry can apply relaxation. For scenarios
+// with fewer than four reloc entries, e.g., R_ALRCH_CALL36, this function
+// should not be used to make a judgment.
+static bool isPair(ArrayRef<Relocation> relocs, size_t i) {
+ return relaxable(relocs, i) && relaxable(relocs, i + 2) &&
+ relocs[i].offset + 4 == relocs[i + 2].offset;
+}
+
+void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
+ const unsigned bits = ctx.arg.is64 ? 64 : 32;
+ uint64_t secAddr = sec.getOutputSection()->addr;
+ if (auto *s = dyn_cast<InputSection>(&sec))
+ secAddr += s->outSecOff;
+ else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
+ secAddr += ehIn->getParent()->outSecOff;
+ const ArrayRef<Relocation> relocs = sec.relocs();
+ for (size_t i = 0, size = relocs.size(); i != size; ++i) {
+ const Relocation &rel = relocs[i];
+ uint8_t *loc = buf + rel.offset;
+ uint64_t val = SignExtend64(
+ sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset), bits);
+
+ switch (rel.expr) {
+ case R_RELAX_HINT:
+ continue;
+ default:
+ break;
+ }
+ relocate(loc, rel, val);
+ }
+}
+
+// Relax pcalau12i,addi.d => pcaddi.
+static void relaxPcalaAddi(const InputSection &sec, size_t i, uint64_t loc,
+ Relocation &r_hi, uint32_t &remove) {
+ const uint64_t symval =
+ (r_hi.expr == R_LOONGARCH_PLT_PAGE_PC ? r_hi.sym->getPltVA(ctx) : r_hi.sym->getVA()) + r_hi.addend;
+ const int64_t dist = symval - loc;
+ uint32_t pca = read32le(sec.content().data() + r_hi.offset);
+ uint32_t add = read32le(sec.content().data() + r_hi.offset + 4);
+ uint32_t rd = LARCH_GET_RD(pca);
+
+ if (!LARCH_INSN_ADDI_D(add)
+ // Is pcalau12i $rd + addi.d $rd, $rd?
+ || LARCH_GET_RD(add) != rd
+ || LARCH_GET_RJ(add) != rd
+ // 4 bytes align
+ || symval & 0x3
+ || !isInt<22>(dist))
+ return;
+
+ // remove the first insn
+ sec.relaxAux->relocTypes[i] = R_LARCH_RELAX;
+ sec.relaxAux->relocTypes[i + 2] = R_LARCH_PCREL20_S2;
+ sec.relaxAux->writes.push_back(LARCH_OP_PCADDI | rd); // pcaddi
+ remove = 4;
+}
+
static bool relax(Ctx &ctx, InputSection &sec) {
const uint64_t secAddr = sec.getVA();
const MutableArrayRef<Relocation> relocs = sec.relocs();
@@ -782,6 +885,11 @@ static bool relax(Ctx &ctx, InputSection &sec) {
}
break;
}
+ case R_LARCH_PCALA_HI20:
+ if (isPair(relocs, i)
+ && relocs[i + 2].type == R_LARCH_PCALA_LO12)
+ relaxPcalaAddi(sec, i, loc, r, remove);
+ break;
}
// For all anchors whose offsets are <= r.offset, they are preceded by
@@ -852,6 +960,7 @@ void LoongArch::finalizeRelax(int passes) const {
MutableArrayRef<Relocation> rels = sec->relocs();
ArrayRef<uint8_t> old = sec->content();
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
+ size_t writesIdx = 0;
uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
uint64_t offset = 0;
int64_t delta = 0;
@@ -872,7 +981,24 @@ void LoongArch::finalizeRelax(int passes) const {
uint64_t size = r.offset - offset;
memcpy(p, old.data() + offset, size);
p += size;
- offset = r.offset + remove;
+
+ int64_t skip = 0;
+ if (r.type != R_LARCH_ALIGN) {
+ RelType newType = aux.relocTypes[i];
+ switch (newType) {
+ case R_LARCH_RELAX:
+ break;
+ case R_LARCH_PCREL20_S2:
+ skip = 4;
+ write32le(p, aux.writes[writesIdx++]);
+ break;
+ default:
+ llvm_unreachable("unsupported type");
+ }
+ }
+
+ p += skip;
+ offset = r.offset + remove + skip;
}
memcpy(p, old.data() + offset, old.size() - offset);
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 2e9e8a7007bbf8..b093a8790ef1a7 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -737,6 +737,17 @@ uint64_t InputSectionBase::getRelocTargetVA(Ctx &ctx, const Relocation &r,
int64_t a = r.addend;
switch (r.expr) {
case R_ABS:
+ // pcalau12i,addi.d => pcaddi
+ // With relaxation applied, the relocation type of the third
+ // reloc entry which corresponds to the addi.d insn is converted
+ // from R_LARCH_PCALA_LO12 to R_LARCH_PCREL20_S2.
+ if (r.type == R_LARCH_PCREL20_S2) {
+ if (r.sym->hasFlag(NEEDS_PLT))
+ return r.sym->getPltVA(ctx) + a - p;
+ else
+ return r.sym->getVA(a) - p;
+ }
+ [[fallthrough]];
case R_DTPREL:
case R_RELAX_TLS_LD_TO_LE_ABS:
case R_RELAX_GOT_PC_NOPIC:
diff --git a/lld/test/ELF/loongarch-relax-align.s b/lld/test/ELF/loongarch-relax-align.s
index ab61e15d5caca2..9cbbc69b91c1cb 100644
--- a/lld/test/ELF/loongarch-relax-align.s
+++ b/lld/test/ELF/loongarch-relax-align.s
@@ -6,56 +6,170 @@
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o -o %t.64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.32.o --no-relax -o %t.32n
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.64.o --no-relax -o %t.64n
-# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s
-# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s
-# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s
-# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32 | FileCheck %s --check-prefix=RELAX32
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64 | FileCheck %s --check-prefixes=RELAX64,SRELAX64
+# RUN: llvm-objdump -td --no-show-raw-insn %t.32n | FileCheck %s --check-prefix=NORELAX
+# RUN: llvm-objdump -td --no-show-raw-insn %t.64n | FileCheck %s --check-prefix=NORELAX
## Test the R_LARCH_ALIGN without symbol index.
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.o64.o --defsym=old=1
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o -o %t.o64
# RUN: ld.lld --section-start=.text=0x10000 --section-start=.text2=0x20000 -e 0 %t.o64.o --no-relax -o %t.o64n
-# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s
-# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64 | FileCheck %s --check-prefixes=RELAX64,ORELAX64
+# RUN: llvm-objdump -td --no-show-raw-insn %t.o64n | FileCheck %s --check-prefix=ONORELAX
## -r keeps section contents unchanged.
# RUN: ld.lld -r %t.64.o -o %t.64.r
# RUN: llvm-objdump -dr --no-show-raw-insn %t.64.r | FileCheck %s --check-prefix=CHECKR
-# CHECK-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
-# CHECK-DAG: {{0*}}10038 l .text {{0*}}0c .L1
-# CHECK-DAG: {{0*}}10040 l .text {{0*}}04 .L2
-# CHECK-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
-
-# CHECK: <.Ltext_start>:
-# CHECK-NEXT: break 1
-# CHECK-NEXT: break 2
-# CHECK-NEXT: nop
-# CHECK-NEXT: nop
-# CHECK-NEXT: break 3
-# CHECK-NEXT: break 4
-# CHECK-NEXT: nop
-# CHECK-NEXT: nop
-# CHECK-NEXT: pcalau12i $a0, 0
-# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
-# CHECK-NEXT: pcalau12i $a0, 0
-# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 56
-# CHECK-NEXT: pcalau12i $a0, 0
-# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 64
-# CHECK-EMPTY:
-# CHECK-NEXT: <.L1>:
-# CHECK-NEXT: nop
-# CHECK-NEXT: nop
-# CHECK-EMPTY:
-# CHECK-NEXT: <.L2>:
-# CHECK-NEXT: break 5
-
-# CHECK: <.Ltext2_start>:
-# CHECK-NEXT: pcalau12i $a0, 0
-# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
-# CHECK-NEXT: nop
-# CHECK-NEXT: nop
-# CHECK-NEXT: break 6
+# RELAX32-DAG: {{0*}}10000 l .text {{0*}}00 .Lalign_symbol
+# RELAX32-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
+# RELAX32-DAG: {{0*}}10038 l .text {{0*}}0c .L1
+# RELAX32-DAG: {{0*}}10040 l .text {{0*}}04 .L2
+# RELAX32-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# RELAX32: <.Ltext_start>:
+# RELAX32-NEXT: break 1
+# RELAX32-NEXT: break 2
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: break 3
+# RELAX32-NEXT: break 4
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: pcalau12i $a0, 0
+# RELAX32-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# RELAX32-NEXT: pcalau12i $a0, 0
+# RELAX32-NEXT: addi.{{[dw]}} $a0, $a0, 56
+# RELAX32-NEXT: pcalau12i $a0, 0
+# RELAX32-NEXT: addi.{{[dw]}} $a0, $a0, 64
+# RELAX32-EMPTY:
+# RELAX32-NEXT: <.L1>:
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: nop
+# RELAX32-EMPTY:
+# RELAX32-NEXT: <.L2>:
+# RELAX32-NEXT: break 5
+
+# RELAX32: <.Ltext2_start>:
+# RELAX32-NEXT: pcalau12i $a0, 0
+# RELAX32-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: nop
+# RELAX32-NEXT: break 6
+
+# NORELAX-DAG: {{0*}}10000 l .text {{0*}}00 .Lalign_symbol
+# NORELAX-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
+# NORELAX-DAG: {{0*}}10038 l .text {{0*}}0c .L1
+# NORELAX-DAG: {{0*}}10040 l .text {{0*}}04 .L2
+# NORELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# NORELAX: <.Ltext_start>:
+# NORELAX-NEXT: break 1
+# NORELAX-NEXT: break 2
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: break 3
+# NORELAX-NEXT: break 4
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: pcalau12i $a0, 0
+# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# NORELAX-NEXT: pcalau12i $a0, 0
+# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 56
+# NORELAX-NEXT: pcalau12i $a0, 0
+# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 64
+# NORELAX-EMPTY:
+# NORELAX-NEXT: <.L1>:
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: nop
+# NORELAX-EMPTY:
+# NORELAX-NEXT: <.L2>:
+# NORELAX-NEXT: break 5
+
+# NORELAX: <.Ltext2_start>:
+# NORELAX-NEXT: pcalau12i $a0, 0
+# NORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: break 6
+
+
+
+# ORELAX64-DAG: {{0*}}00001 l *ABS* {{0*}}00 old
+# SRELAX64-DAG: {{0*}}10000 l .text {{0*}}00 .Lalign_symbol
+# RELAX64-DAG: {{0*}}10000 l .text {{0*}}34 .Ltext_start
+# RELAX64-DAG: {{0*}}1002c l .text {{0*}}08 .L1
+# RELAX64-DAG: {{0*}}10030 l .text {{0*}}04 .L2
+# RELAX64-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# RELAX64: <.Ltext_start>:
+# RELAX64-NEXT: break 1
+# RELAX64-NEXT: break 2
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: break 3
+# RELAX64-NEXT: break 4
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: pcaddi $a0, -8
+# RELAX64-NEXT: pcaddi $a0, 2
+# RELAX64-NEXT: pcaddi $a0, 2
+# RELAX64-EMPTY:
+# RELAX64-NEXT: <.L1>:
+# RELAX64-NEXT: nop
+# RELAX64-EMPTY:
+# RELAX64-NEXT: <.L2>:
+# RELAX64-NEXT: break 5
+
+# RELAX64: <.Ltext2_start>:
+# RELAX64-NEXT: pcaddi $a0, 0
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: nop
+# RELAX64-NEXT: break 6
+
+
+# ONORELAX-DAG: {{0*}}00001 l *ABS* {{0*}}00 old
+# ONORELAX-DAG: {{0*}}10000 l .text {{0*}}44 .Ltext_start
+# ONORELAX-DAG: {{0*}}10038 l .text {{0*}}0c .L1
+# ONORELAX-DAG: {{0*}}10040 l .text {{0*}}04 .L2
+# ONORELAX-DAG: {{0*}}20000 l .text2 {{0*}}14 .Ltext2_start
+
+# ONORELAX: <.Ltext_start>:
+# ONORELAX-NEXT: break 1
+# ONORELAX-NEXT: break 2
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: break 3
+# ONORELAX-NEXT: break 4
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: pcalau12i $a0, 0
+# ONORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# ONORELAX-NEXT: pcalau12i $a0, 0
+# ONORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 56
+# ONORELAX-NEXT: pcalau12i $a0, 0
+# ONORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 64
+# ONORELAX-EMPTY:
+# ONORELAX-NEXT: <.L1>:
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: nop
+# ONORELAX-EMPTY:
+# ONORELAX-NEXT: <.L2>:
+# ONORELAX-NEXT: break 5
+
+# ONORELAX: <.Ltext2_start>:
+# ONORELAX-NEXT: pcalau12i $a0, 0
+# ONORELAX-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: nop
+# ONORELAX-NEXT: break 6
+
+
+
+
+
# CHECKR: <.Ltext2_start>:
# CHECKR-NEXT: pcalau12i $a0, 0
diff --git a/lld/test/ELF/loongarch-relax-emit-relocs.s b/lld/test/ELF/loongarch-relax-emit-relocs.s
index ba414e8c93f0fb..8784bf57e7ff33 100644
--- a/lld/test/ELF/loongarch-relax-emit-relocs.s
+++ b/lld/test/ELF/loongarch-relax-emit-relocs.s
@@ -5,42 +5,65 @@
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.64.o
# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.32.o -o %t.32
# RUN: ld.lld -Ttext=0x10000 --emit-relocs %t.64.o -o %t.64
-# RUN: llvm-objdump -dr %t.32 | FileCheck %s
-# RUN: llvm-objdump -dr %t.64 | FileCheck %s
+# RUN: llvm-objdump -dr %t.32 | FileCheck %s --check-prefix=LA32RELOC
+# RUN: llvm-objdump -dr %t.64 | FileCheck %s --check-prefix=LA64RELOC
## -r should keep original relocations.
# RUN: ld.lld -r %t.64.o -o %t.64.r
-# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=CHECKR
+# RUN: llvm-objdump -dr %t.64.r | FileCheck %s --check-prefix=RELAX
-## --no-relax should keep original relocations.
-## TODO Due to R_LARCH_RELAX is not relaxed, it plays same as --relax now.
# RUN: ld.lld -Ttext=0x10000 --emit-relocs --no-relax %t.64.o -o %t.64.norelax
-# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s
-
-# CHECK: 00010000 <_start>:
-# CHECK-NEXT: pcalau12i $a0, 0
-# CHECK-NEXT: R_LARCH_PCALA_HI20 _start
-# CHECK-NEXT: R_LARCH_RELAX *ABS*
-# CHECK-NEXT: addi.{{[dw]}} $a0, $a0, 0
-# CHECK-NEXT: R_LARCH_PCALA_LO12 _start
-# CHECK-NEXT: R_LARCH_RELAX *ABS*
-# CHECK-NEXT: nop
-# CHECK-NEXT: R_LARCH_ALIGN *ABS*+0xc
-# CHECK-NEXT: nop
-# CHECK-NEXT: ret
-
-# CHECKR: <_start>:
-# CHECKR-NEXT: pcalau12i $a0, 0
-# CHECKR-NEXT: R_LARCH_PCALA_HI20 _start
-# CHECKR-NEXT: R_LARCH_RELAX *ABS*
-# CHECKR-NEXT: addi.d $a0, $a0, 0
-# CHECKR-NEXT: R_LARCH_PCALA_LO12 _start
-# CHECKR-NEXT: R_LARCH_RELAX *ABS*
-# CHECKR-NEXT: nop
-# CHECKR-NEXT: R_LARCH_ALIGN *ABS*+0xc
-# CHECKR-NEXT: nop
-# CHECKR-NEXT: nop
-# CHECKR-NEXT: ret
+# RUN: llvm-objdump -dr %t.64.norelax | FileCheck %s --check-prefix=NORELAX
+
+# LA32RELOC: 00010000 <_start>:
+# LA32RELOC-NEXT: pcalau12i $a0, 0
+# LA32RELOC-NEXT: R_LARCH_PCALA_HI20 _start
+# LA32RELOC-NEXT: R_LARCH_RELAX *ABS*
+# LA32RELOC-NEXT: addi.{{[dw]}} $a0, $a0, 0
+# LA32RELOC-NEXT: R_LARCH_PCALA_LO12 _start
+# LA32RELOC-NEXT: R_LARCH_RELAX *ABS*
+# LA32RELOC-NEXT: nop
+# LA32RELOC-NEXT: R_LARCH_ALIGN *ABS*+0xc
+# LA32RELOC-NEXT: nop
+# LA32RELOC-NEXT: ret
+
+# LA64RELOC: 00010000 <_start>:
+# LA64RELOC-NEXT: pcaddi $a0, 0
+# LA64RELOC-NEXT: R_LARCH_RELAX _start
+# LA64RELOC-NEXT: R_LARCH_RELAX *ABS*
+# LA64RELOC-NEXT: R_LARCH_PCREL20_S2 _start
+# LA64RELOC-NEXT: R_LARCH_RELAX *ABS*
+# LA64RELOC-NEXT: nop
+# LA64RELOC-NEXT: R_LARCH_ALIGN *ABS*+0xc
+# LA64RELOC-NEXT: nop
+# LA64RELOC-NEXT: nop
+# LA64RELOC-NEXT: ret
+
+
+# RELAX: <_start>:
+# RELAX-NEXT: pcalau12i $a0, 0
+# RELAX-NEXT: R_LARCH_PCALA_HI20 _start
+# RELAX-NEXT: R_LARCH_RELAX *ABS*
+# RELAX-NEXT: addi.d $a0, $a0, 0
+# RELAX-NEXT: R_LARCH_PCALA_LO12 _start
+# RELAX-NEXT: R_LARCH_RELAX *ABS*
+# RELAX-NEXT: nop
+# RELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc
+# RELAX-NEXT: nop
+# RELAX-NEXT: nop
+# RELAX-NEXT: ret
+
+# NORELAX: <_start>:
+# NORELAX-NEXT: pcalau12i $a0, 0
+# NORELAX-NEXT: R_LARCH_PCALA_HI20 _start
+# NORELAX-NEXT: R_LARCH_RELAX *ABS*
+# NORELAX-NEXT: addi.d $a0, $a0, 0
+# NORELAX-NEXT: R_LARCH_PCALA_LO12 _start
+# NORELAX-NEXT: R_LARCH_RELAX *ABS*
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: R_LARCH_ALIGN *ABS*+0xc
+# NORELAX-NEXT: nop
+# NORELAX-NEXT: ret
.global _start
_start:
|
You can test this locally with the following command:git-clang-format --diff 1b4a173fa41e02eddec9f1cf41324aa4ea8a7fa5 8f9e9c8e264b72ab161e2963da1bc82059d5a551 --extensions cpp -- lld/ELF/Arch/LoongArch.cpp lld/ELF/InputSection.cppView the diff from clang-format here.diff --git a/lld/ELF/Arch/LoongArch.cpp b/lld/ELF/Arch/LoongArch.cpp
index e7276c23d0..ca3a7fe010 100644
--- a/lld/ELF/Arch/LoongArch.cpp
+++ b/lld/ELF/Arch/LoongArch.cpp
@@ -823,9 +823,11 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
// Relax pcalau12i,addi.d => pcaddi.
static void relaxPcalaAddi(const InputSection &sec, size_t i, uint64_t loc,
- Relocation &r_hi, uint32_t &remove) {
+ Relocation &r_hi, uint32_t &remove) {
const uint64_t symval =
- (r_hi.expr == R_LOONGARCH_PLT_PAGE_PC ? r_hi.sym->getPltVA(ctx) : r_hi.sym->getVA()) + r_hi.addend;
+ (r_hi.expr == R_LOONGARCH_PLT_PAGE_PC ? r_hi.sym->getPltVA(ctx)
+ : r_hi.sym->getVA()) +
+ r_hi.addend;
const int64_t dist = symval - loc;
uint32_t pca = read32le(sec.content().data() + r_hi.offset);
uint32_t add = read32le(sec.content().data() + r_hi.offset + 4);
@@ -833,11 +835,10 @@ static void relaxPcalaAddi(const InputSection &sec, size_t i, uint64_t loc,
if (!LARCH_INSN_ADDI_D(add)
// Is pcalau12i $rd + addi.d $rd, $rd?
- || LARCH_GET_RD(add) != rd
- || LARCH_GET_RJ(add) != rd
+ || LARCH_GET_RD(add) != rd ||
+ LARCH_GET_RJ(add) != rd
// 4 bytes align
- || symval & 0x3
- || !isInt<22>(dist))
+ || symval & 0x3 || !isInt<22>(dist))
return;
// remove the first insn
@@ -886,8 +887,7 @@ static bool relax(Ctx &ctx, InputSection &sec) {
break;
}
case R_LARCH_PCALA_HI20:
- if (isPair(relocs, i)
- && relocs[i + 2].type == R_LARCH_PCALA_LO12)
+ if (isPair(relocs, i) && relocs[i + 2].type == R_LARCH_PCALA_LO12)
relaxPcalaAddi(sec, i, loc, r, remove);
break;
}
|
|
All commits in one PR will be squashed and merged, so you'd better rewrite the commit message in the first commit. |
lld/ELF/Arch/LoongArch.cpp
Outdated
| namespace { | ||
| #define LARCH_GET_RD(insn) (insn & 0x1f) | ||
| #define LARCH_GET_RJ(insn) ((insn >> 5) & 0x1f) | ||
| #define LARCH_MK_ADDI_D 0xffc00000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to only define the functions which you need in this series patches, that avoid redundant review. And also keep same style with set{D5k16, D10k16, ...}, which I think it make codes clean. (For the meaning of "D""K", you can reference https://areweloongyet.com/asmdb, or ask for Xuerui?)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to only define the functions which you need in this series patches, that avoid redundant review. And also keep same style with set{D5k16, D10k16, ...}, which I think it make codes clean. (For the meaning of "D""K", you can reference https://areweloongyet.com/asmdb, or ask for Xuerui?)
Thanks, it's important to ensure code consistency, in addition this, would it be better to put macro definitions in a separate pr?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As this pr is add new feature, simply add what you need.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use functions rather than macros.
lld/ELF/Arch/LoongArch.cpp
Outdated
| } | ||
|
|
||
| static bool relaxable(ArrayRef<Relocation> relocs, size_t i) { | ||
| return i + 1 != relocs.size() && relocs[i + 1].type == R_LARCH_RELAX; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i + 1 ('<=' vs '!=' ?) relocs.size()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i + 1 ('<=' vs '!=' ?) relocs.size()
In the scenario here, it seems that i+1 < relocs.size() and i+1 ! = relocs.size() are equivalent?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be equivalent with "<" in many scenario. The "<=" is something wrong, please ignore.
| break; | ||
| } | ||
| case R_LARCH_PCALA_HI20: | ||
| if (isPair(relocs, i) && relocs[i + 2].type == R_LARCH_PCALA_LO12) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i+2 causes overflow?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
i+2 causes overflow?
isPair will check this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
R_LARCH_PCALA_HI20 <- i
R_LARCH_RELAX <- i + 1 == reloc.size() - 1
END
Check relaxable(reloc, i) is OK.
When check relaxable(reloc, i + 2), due to the "i + 2 + 1" != reloc.size() is true, may it overflow?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
R_LARCH_PCALA_HI20 <- i R_LARCH_RELAX <- i + 1 == reloc.size() - 1 END Check relaxable(reloc, i) is OK. When check relaxable(reloc, i + 2), due to the "i + 2 + 1" != reloc.size() is true, may it overflow?
It seems that in the isPair function, it would be better to change i + 1 ! = relocs.size() to i + 1 < relocs.size() .
lld/ELF/Arch/LoongArch.cpp
Outdated
| #define LARCH_GET_RD(insn) (insn & 0x1f) | ||
| #define LARCH_GET_RJ(insn) ((insn >> 5) & 0x1f) | ||
| #define LARCH_MK_ADDI_D 0xffc00000 | ||
| #define LARCH_OP_ADDI_D 0x02c00000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we reuse the values in below enum (i.e. line 93)?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we reuse the values in below enum (i.e. line 93)?
Actually, I'm thinking if we could remove the enum values of Op and use the newly defined macros for all operations about instruction opcode? Because we may need to use the mask of instruction sometimes.
|
Any progress? |
SixWeining
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Basically it looks good to me. But I'd like to know how did you test the relaxation functionality? For example, use gcc(with relax enabled) + lld to build some programs?
@MQ-mengqing's do you have any futher comments?
lld/ELF/Arch/LoongArch.cpp
Outdated
| R_ZERO = 0, | ||
| R_RA = 1, | ||
| R_TP = 2, | ||
| R_A0 = 4, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems it is not used by current patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Seems it is not used by current patch.
Yes, thanks for remainding.
Because the code of this pr is split from a complete relaxation patch, there may be partially unused code. I will check the code of this pr again as soon as possible.
Yes. Here's the details of my testing process.
For performance reference |
MQ-mengqing
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As this commit just support relax 'pcalau12i+addi.x' to 'pcaddi', a single feature, I think it works well with these test results.
LGTM.
lld/ELF/Arch/LoongArch.cpp
Outdated
| namespace { | ||
| #define LARCH_GET_RD(insn) (insn & 0x1f) | ||
| #define LARCH_GET_RJ(insn) ((insn >> 5) & 0x1f) | ||
| #define LARCH_MK_ADDI_D 0xffc00000 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use functions rather than macros.
MaskRay
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please don't land the patch. Non-trivial patches to lld/ELF require approval from maintainers.
lld/ELF/InputSection.cpp
Outdated
| // With relaxation applied, the relocation type of the third | ||
| // reloc entry which corresponds to the addi.[wd] insn is converted | ||
| // from R_LARCH_PCALA_LO12 to R_LARCH_PCREL20_S2. | ||
| if (r.type == R_LARCH_PCREL20_S2) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This adds overhead to R_ABS (numerous) for other targets. Should be avoided by using a new RelExpr for LoongArch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not just overhead, it's not checking e_machine so is true for any target that happens to have an absolute relocation with encoding 103. None exist in-tree, but this is still broken code.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In fact, when I originally wrote the code here, the idea was to align it with the code I just pushed. I.e. adjust RelExpr after relaxation. but we can't get the new RelExpr via the getRelExpr function because functions like fromPlt/toPlt may need to change RelExpr. so we have to manually assign the RelExpr, which is not very standardized in my opinion.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| // LoongArch instructions could be divided into a number of kinds based on | ||
| // the width of imm and the number of registers. get_rd/get_rj only applies | ||
| // to those kinds of instructions that could do relocation. | ||
| static uint8_t get_rd(uint32_t insn) { return insn & 0x1f; } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
functionCase
This commit contains following changes.
R_LARCH_PCALA_LO12 may could be converted to pcaddi, which
improves performance by reduces the number of instructions.