Skip to content

Commit 6f53f1c

Browse files
authored
[ELF] -r: Synthesize R_RISCV_ALIGN at input section start
Without linker relaxation enabled for a particular relocatable file or section (e.g., using .option norelax), the assembler will not generate R_RISCV_ALIGN relocations for alignment directives. This becomes problematic in a two-stage linking process: ``` ld -r a.o b.o -o ab.o // b.o is norelax. Its alignment information is lost in ab.o. ld ab.o -o ab ``` When ab.o is linked into an executable, the preceding relaxed section (a.o's content) might shrink. Since there's no R_RISCV_ALIGN relocation in b.o for the linker to act upon, the `.word 0x3a393837` data in b.o may end up unaligned in the final executable. To address the issue, this patch inserts NOP bytes and synthesizes an R_RISCV_ALIGN relocation at the beginning of a text section when the alignment >= 4. For simplicity, when RVC is disabled, we synthesize an ALIGN relocation (addend: 2) for a 4-byte aligned section, allowing the linker to trim the excess 2 bytes. See also https://sourceware.org/bugzilla/show_bug.cgi?id=33236 Pull Request: llvm#151639
1 parent 0c13988 commit 6f53f1c

File tree

5 files changed

+281
-4
lines changed

5 files changed

+281
-4
lines changed

lld/ELF/Arch/RISCV.cpp

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,21 @@ class RISCV final : public TargetInfo {
4545
uint64_t val) const override;
4646
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
4747
bool relaxOnce(int pass) const override;
48+
template <class ELFT, class RelTy>
49+
bool synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
50+
Relocs<RelTy> rels);
51+
template <class ELFT, class RelTy>
52+
void finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
53+
Relocs<RelTy> rels);
54+
template <class ELFT>
55+
bool synthesizeAlignAux(uint64_t &dot, InputSection *sec);
56+
bool synthesizeAlign(uint64_t &dot, InputSection *sec) override;
4857
void finalizeRelax(int passes) const override;
58+
59+
// The following two variables are used by synthesized ALIGN relocations.
60+
InputSection *baseSec = nullptr;
61+
// r_offset and r_addend pairs.
62+
SmallVector<std::pair<uint64_t, uint64_t>, 0> synthesizedAligns;
4963
};
5064

5165
} // end anonymous namespace
@@ -956,6 +970,116 @@ bool RISCV::relaxOnce(int pass) const {
956970
return changed;
957971
}
958972

973+
// If the section alignment is >= 4, advance `dot` to insert NOPs and synthesize
974+
// an ALIGN relocation. Otherwise, return false to use default handling.
975+
template <class ELFT, class RelTy>
976+
bool RISCV::synthesizeAlignForInput(uint64_t &dot, InputSection *sec,
977+
Relocs<RelTy> rels) {
978+
if (!baseSec) {
979+
// Record the first input section with RELAX relocations. We will synthesize
980+
// ALIGN relocations here.
981+
for (auto rel : rels) {
982+
if (rel.getType(false) == R_RISCV_RELAX) {
983+
baseSec = sec;
984+
break;
985+
}
986+
}
987+
} else if (sec->addralign >= 4) {
988+
// If the alignment is >= 4 and the section does not start with an ALIGN
989+
// relocation, synthesize one.
990+
bool hasAlignRel = llvm::any_of(rels, [](const RelTy &rel) {
991+
return rel.r_offset == 0 && rel.getType(false) == R_RISCV_ALIGN;
992+
});
993+
if (!hasAlignRel) {
994+
synthesizedAligns.emplace_back(dot - baseSec->getVA(),
995+
sec->addralign - 2);
996+
dot += sec->addralign - 2;
997+
return true;
998+
}
999+
}
1000+
return false;
1001+
}
1002+
1003+
// Finalize the relocation section by appending synthesized ALIGN relocations
1004+
// after processing all input sections.
1005+
template <class ELFT, class RelTy>
1006+
void RISCV::finalizeSynthesizeAligns(uint64_t &dot, InputSection *sec,
1007+
Relocs<RelTy> rels) {
1008+
auto *f = cast<ObjFile<ELFT>>(baseSec->file);
1009+
auto shdr = f->template getELFShdrs<ELFT>()[baseSec->relSecIdx];
1010+
// Create a copy of InputSection.
1011+
sec = make<InputSection>(*f, shdr, baseSec->name);
1012+
auto *baseRelSec = cast<InputSection>(f->getSections()[baseSec->relSecIdx]);
1013+
*sec = *baseRelSec;
1014+
baseSec = nullptr;
1015+
1016+
// Allocate buffer for original and synthesized relocations in RELA format.
1017+
// If CREL is used, OutputSection::finalizeNonAllocCrel will convert RELA to
1018+
// CREL.
1019+
auto newSize = rels.size() + synthesizedAligns.size();
1020+
auto *relas = makeThreadLocalN<typename ELFT::Rela>(newSize);
1021+
sec->size = newSize * sizeof(typename ELFT::Rela);
1022+
sec->content_ = reinterpret_cast<uint8_t *>(relas);
1023+
sec->type = SHT_RELA;
1024+
// Copy original relocations to the new buffer, potentially converting CREL to
1025+
// RELA.
1026+
for (auto [i, r] : llvm::enumerate(rels)) {
1027+
relas[i].r_offset = r.r_offset;
1028+
relas[i].setSymbolAndType(r.getSymbol(0), r.getType(0), false);
1029+
if constexpr (RelTy::HasAddend)
1030+
relas[i].r_addend = r.r_addend;
1031+
}
1032+
// Append synthesized ALIGN relocations to the buffer.
1033+
for (auto [i, r] : llvm::enumerate(synthesizedAligns)) {
1034+
auto &rela = relas[rels.size() + i];
1035+
rela.r_offset = r.first;
1036+
rela.setSymbolAndType(0, R_RISCV_ALIGN, false);
1037+
rela.r_addend = r.second;
1038+
}
1039+
// Replace the old relocation section with the new one in the output section.
1040+
// addOrphanSections ensures that the output relocation section is processed
1041+
// after osec.
1042+
for (SectionCommand *cmd : sec->getParent()->commands) {
1043+
auto *isd = dyn_cast<InputSectionDescription>(cmd);
1044+
if (!isd)
1045+
continue;
1046+
for (auto *&isec : isd->sections)
1047+
if (isec == baseRelSec)
1048+
isec = sec;
1049+
}
1050+
}
1051+
1052+
template <class ELFT>
1053+
bool RISCV::synthesizeAlignAux(uint64_t &dot, InputSection *sec) {
1054+
bool ret = false;
1055+
if (sec) {
1056+
invokeOnRelocs(*sec, ret = synthesizeAlignForInput<ELFT>, dot, sec);
1057+
} else if (baseSec) {
1058+
invokeOnRelocs(*baseSec, finalizeSynthesizeAligns<ELFT>, dot, sec);
1059+
}
1060+
return ret;
1061+
}
1062+
1063+
// Without linker relaxation enabled for a particular relocatable file or
1064+
// section, the assembler will not generate R_RISCV_ALIGN relocations for
1065+
// alignment directives. This becomes problematic in a two-stage linking
1066+
// process: ld -r a.o b.o -o ab.o; ld ab.o -o ab. This function synthesizes an
1067+
// R_RISCV_ALIGN relocation at section start when needed.
1068+
//
1069+
// When called with an input section (`sec` is not null): If the section
1070+
// alignment is >= 4, advance `dot` to insert NOPs and synthesize an ALIGN
1071+
// relocation.
1072+
//
1073+
// When called after all input sections are processed (`sec` is null): The
1074+
// output relocation section is updated with all the newly synthesized ALIGN
1075+
// relocations.
1076+
bool RISCV::synthesizeAlign(uint64_t &dot, InputSection *sec) {
1077+
assert(ctx.arg.relocatable);
1078+
if (ctx.arg.is64)
1079+
return synthesizeAlignAux<ELF64LE>(dot, sec);
1080+
return synthesizeAlignAux<ELF32LE>(dot, sec);
1081+
}
1082+
9591083
void RISCV::finalizeRelax(int passes) const {
9601084
llvm::TimeTraceScope timeScope("Finalize RISC-V relaxation");
9611085
Log(ctx) << "relaxation passes: " << passes;

lld/ELF/LinkerScript.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,9 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
12301230
if (sec->firstInOverlay)
12311231
state->overlaySize = 0;
12321232

1233+
bool synthesizeAlign = ctx.arg.relocatable && ctx.arg.relax &&
1234+
(sec->flags & SHF_EXECINSTR) &&
1235+
ctx.arg.emachine == EM_RISCV;
12331236
// We visited SectionsCommands from processSectionCommands to
12341237
// layout sections. Now, we visit SectionsCommands again to fix
12351238
// section offsets.
@@ -1260,7 +1263,10 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
12601263
if (isa<PotentialSpillSection>(isec))
12611264
continue;
12621265
const uint64_t pos = dot;
1263-
dot = alignToPowerOf2(dot, isec->addralign);
1266+
// If synthesized ALIGN may be needed, call maybeSynthesizeAlign and
1267+
// disable the default handling if the return value is true.
1268+
if (!(synthesizeAlign && ctx.target->synthesizeAlign(dot, isec)))
1269+
dot = alignToPowerOf2(dot, isec->addralign);
12641270
isec->outSecOff = dot - sec->addr;
12651271
dot += isec->getSize();
12661272

@@ -1276,6 +1282,12 @@ bool LinkerScript::assignOffsets(OutputSection *sec) {
12761282
if (ctx.in.relroPadding && sec == ctx.in.relroPadding->getParent())
12771283
expandOutputSection(alignToPowerOf2(dot, ctx.arg.commonPageSize) - dot);
12781284

1285+
if (synthesizeAlign) {
1286+
const uint64_t pos = dot;
1287+
ctx.target->synthesizeAlign(dot, nullptr);
1288+
expandOutputSection(dot - pos);
1289+
}
1290+
12791291
// Non-SHF_ALLOC sections do not affect the addresses of other OutputSections
12801292
// as they are not part of the process image.
12811293
if (!(sec->flags & SHF_ALLOC)) {

lld/ELF/OutputSections.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -889,9 +889,17 @@ void OutputSection::sortInitFini() {
889889
std::array<uint8_t, 4> OutputSection::getFiller(Ctx &ctx) {
890890
if (filler)
891891
return *filler;
892-
if (flags & SHF_EXECINSTR)
893-
return ctx.target->trapInstr;
894-
return {0, 0, 0, 0};
892+
if (!(flags & SHF_EXECINSTR))
893+
return {0, 0, 0, 0};
894+
if (ctx.arg.relocatable && ctx.arg.emachine == EM_RISCV) {
895+
// See RISCV::maybeSynthesizeAlign: Synthesized NOP bytes and ALIGN
896+
// relocations might be needed between two input sections. Use a NOP for the
897+
// filler.
898+
if (ctx.arg.eflags & EF_RISCV_RVC)
899+
return {1, 0, 1, 0};
900+
return {0x13, 0, 0, 0};
901+
}
902+
return ctx.target->trapInstr;
895903
}
896904

897905
void OutputSection::checkDynRelAddends(Ctx &ctx) {

lld/ELF/Target.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,9 @@ class TargetInfo {
9696

9797
// Do a linker relaxation pass and return true if we changed something.
9898
virtual bool relaxOnce(int pass) const { return false; }
99+
virtual bool synthesizeAlign(uint64_t &dot, InputSection *sec) {
100+
return false;
101+
}
99102
// Do finalize relaxation after collecting relaxation infos.
100103
virtual void finalizeRelax(int passes) const {}
101104

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# RUN: rm -rf %t && split-file %s %t && cd %t
2+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax a.s -o ac.o
3+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax b.s -o bc.o
4+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax b1.s -o b1c.o
5+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax c.s -o cc.o
6+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c d.s -o dc.o
7+
8+
## No RELAX. Don't synthesize ALIGN.
9+
# RUN: ld.lld -r bc.o dc.o -o bd.ro
10+
# RUN: llvm-readelf -r bd.ro | FileCheck %s --check-prefix=NOREL
11+
12+
# NOREL: no relocations
13+
14+
# RUN: ld.lld -r bc.o bc.o ac.o bc.o b1c.o cc.o dc.o -o out.ro
15+
# RUN: llvm-objdump -dr -M no-aliases out.ro | FileCheck %s
16+
17+
# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax a.s -o a.o
18+
# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax b.s -o b.o
19+
# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax d.s -o d.o
20+
# RUN: ld.lld -r a.o b.o d.o -o out0.ro
21+
# RUN: ld.lld -Ttext=0x10000 out0.ro -o out0
22+
# RUN: llvm-objdump -dr -M no-aliases out0 | FileCheck %s --check-prefix=CHECK1
23+
24+
# CHECK: <b0>:
25+
# CHECK-NEXT: 0: 00158513 addi a0, a1, 0x1
26+
# CHECK-NEXT: 4: 0001 c.nop
27+
# CHECK-NEXT: 6: 0001 c.nop
28+
# CHECK-EMPTY:
29+
# CHECK-NEXT: <b0>:
30+
# CHECK-NEXT: 8: 00158513 addi a0, a1, 0x1
31+
# CHECK-EMPTY:
32+
# CHECK-NEXT: <_start>:
33+
# CHECK-NEXT: c: 00000097 auipc ra, 0x0
34+
# CHECK-NEXT: 000000000000000c: R_RISCV_CALL_PLT foo
35+
# CHECK-NEXT: 000000000000000c: R_RISCV_RELAX *ABS*
36+
# CHECK-NEXT: 10: 000080e7 jalr ra, 0x0(ra) <_start>
37+
# CHECK-NEXT: 14: 0001 c.nop
38+
# CHECK-NEXT: 0000000000000014: R_RISCV_ALIGN *ABS*+0x6
39+
# CHECK-NEXT: 16: 0001 c.nop
40+
# CHECK-NEXT: 18: 0001 c.nop
41+
# CHECK-EMPTY:
42+
# CHECK-NEXT: <b0>:
43+
# CHECK-NEXT: 1a: 00158513 addi a0, a1, 0x1
44+
# CHECK-NEXT: 1e: 0001 c.nop
45+
# CHECK-NEXT: 20: 0001 c.nop
46+
# CHECK-NEXT: 0000000000000020: R_RISCV_ALIGN *ABS*+0x6
47+
# CHECK-NEXT: 22: 0001 c.nop
48+
# CHECK-NEXT: 24: 00000013 addi zero, zero, 0x0
49+
# CHECK-EMPTY:
50+
# CHECK-NEXT: <b0>:
51+
# CHECK-NEXT: 28: 00158513 addi a0, a1, 0x1
52+
# CHECK-EMPTY:
53+
# CHECK-NEXT: <c0>:
54+
# CHECK-NEXT: 2c: 00000097 auipc ra, 0x0
55+
# CHECK-NEXT: 000000000000002c: R_RISCV_CALL_PLT foo
56+
# CHECK-NEXT: 000000000000002c: R_RISCV_RELAX *ABS*
57+
# CHECK-NEXT: 30: 000080e7 jalr ra, 0x0(ra) <c0>
58+
# CHECK-NEXT: 34: 0001 c.nop
59+
# CHECK-NEXT: 0000000000000034: R_RISCV_ALIGN *ABS*+0x2
60+
# CHECK-EMPTY:
61+
# CHECK-NEXT: <d0>:
62+
# CHECK-NEXT: 36: 00258513 addi a0, a1, 0x2
63+
64+
# CHECK1: <_start>:
65+
# CHECK1-NEXT: 010000ef jal ra, 0x10010 <foo>
66+
# CHECK1-NEXT: 00000013 addi zero, zero, 0x0
67+
# CHECK1-EMPTY:
68+
# CHECK1-NEXT: <b0>:
69+
# CHECK1-NEXT: 00158513 addi a0, a1, 0x1
70+
# CHECK1-EMPTY:
71+
# CHECK1-NEXT: <d0>:
72+
# CHECK1-NEXT: 00258513 addi a0, a1, 0x2
73+
74+
## Test CREL.
75+
# RUN: llvm-mc -filetype=obj -triple=riscv64 -mattr=+c,+relax --crel a.s -o acrel.o
76+
# RUN: ld.lld -r acrel.o bc.o -o out1.ro
77+
# RUN: llvm-objdump -dr -M no-aliases out1.ro | FileCheck %s --check-prefix=CHECK2
78+
79+
# CHECK2: <_start>:
80+
# CHECK2-NEXT: 0: 00000097 auipc ra, 0x0
81+
# CHECK2-NEXT: 0000000000000000: R_RISCV_CALL_PLT foo
82+
# CHECK2-NEXT: 0000000000000000: R_RISCV_RELAX *ABS*
83+
# CHECK2-NEXT: 4: 000080e7 jalr ra, 0x0(ra) <_start>
84+
# CHECK2-NEXT: 8: 0001 c.nop
85+
# CHECK2-NEXT: 0000000000000008: R_RISCV_ALIGN *ABS*+0x6
86+
# CHECK2-NEXT: a: 0001 c.nop
87+
# CHECK2-NEXT: c: 0001 c.nop
88+
# CHECK2-EMPTY:
89+
# CHECK2-NEXT: <b0>:
90+
# CHECK2-NEXT: e: 00158513 addi a0, a1, 0x1
91+
92+
#--- a.s
93+
.globl _start
94+
_start:
95+
call foo
96+
97+
.section .text1,"ax"
98+
.globl foo
99+
foo:
100+
101+
#--- b.s
102+
## Needs synthesized ALIGN
103+
.option push
104+
.option norelax
105+
.balign 8
106+
b0:
107+
addi a0, a1, 1
108+
.option pop
109+
110+
#--- b1.s
111+
.option push
112+
.option norelax
113+
.reloc ., R_RISCV_ALIGN, 6
114+
addi x0, x0, 0
115+
c.nop
116+
.balign 8
117+
b0:
118+
addi a0, a1, 1
119+
.option pop
120+
121+
#--- c.s
122+
.balign 2
123+
c0:
124+
call foo
125+
126+
#--- d.s
127+
## Needs synthesized ALIGN
128+
.balign 4
129+
d0:
130+
addi a0, a1, 2

0 commit comments

Comments
 (0)