Skip to content
1 change: 1 addition & 0 deletions lld/ELF/Config.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ struct Config {
bool zIfuncNoplt;
bool zInitfirst;
bool zInterpose;
bool zKeepDataSectionPrefix;
bool zKeepTextSectionPrefix;
bool zLrodataAfterBss;
bool zNoBtCfi;
Expand Down
2 changes: 2 additions & 0 deletions lld/ELF/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1564,6 +1564,8 @@ static void readConfigs(Ctx &ctx, opt::InputArgList &args) {
ctx.arg.zIfuncNoplt = hasZOption(args, "ifunc-noplt");
ctx.arg.zInitfirst = hasZOption(args, "initfirst");
ctx.arg.zInterpose = hasZOption(args, "interpose");
ctx.arg.zKeepDataSectionPrefix = getZFlag(
args, "keep-data-section-prefix", "nokeep-data-section-prefix", false);
ctx.arg.zKeepTextSectionPrefix = getZFlag(
args, "keep-text-section-prefix", "nokeep-text-section-prefix", false);
ctx.arg.zLrodataAfterBss =
Expand Down
47 changes: 40 additions & 7 deletions lld/ELF/LinkerScript.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ static bool isSectionPrefix(StringRef prefix, StringRef name) {
return name.consume_front(prefix) && (name.empty() || name[0] == '.');
}

static bool isSectionSuffix(StringRef suffix, StringRef name) {
name.consume_back(".");
return name.ends_with(suffix);
}

StringRef LinkerScript::getOutputSectionName(const InputSectionBase *s) const {
// This is for --emit-relocs and -r. If .text.foo is emitted as .text.bar, we
// want to emit .rela.text.foo as .rela.text.bar for consistency (this is not
Expand Down Expand Up @@ -105,13 +110,41 @@ StringRef LinkerScript::getOutputSectionName(const InputSectionBase *s) const {
return ".text";
}

for (StringRef v : {".data.rel.ro", ".data", ".rodata",
".bss.rel.ro", ".bss", ".ldata",
".lrodata", ".lbss", ".gcc_except_table",
".init_array", ".fini_array", ".tbss",
".tdata", ".ARM.exidx", ".ARM.extab",
".ctors", ".dtors", ".sbss",
".sdata", ".srodata"})
// When zKeepDataSectionPrefix is true, keep .hot and .unlikely suffixes
// in data sections.
static constexpr StringRef dataSectionPrefixes[] = {
".data.rel.ro", ".data", ".rodata", ".bss.rel.ro", ".bss",
};

for (auto [index, v] : llvm::enumerate(dataSectionPrefixes)) {
if (isSectionPrefix(v, s->name)) {
// The .bss.rel.ro section is considered rarely accessed. So this section
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make if (isSectionPrefix(v, s->name)) { early return.

.bss.rel.ro is currently only used by LLD's copy relocation feature, not generated by compilers. It's unnecessary to custom stuff with index != 3. Just remove the condition.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make if (isSectionPrefix(v, s->name)) { early return.

Just to confirm, is the early return primarily aimed at optimizing the path where zKeepDataSectionPrefix is false? I'll go ahead and implement it that way.

.bss.rel.ro is currently only used by LLD's copy relocation feature, not generated by compilers. It's unnecessary to custom stuff with index != 3. Just remove the condition.

Thanks for the information. Removed 'index = 3' stuff, and updated lld/test/ELF/data-section-prefix.s accordingly to have .bss.rel.ro without any prefix.

// is not partitioned and zKeepDataSectionPrefix is not applied to
// make the executable simpler with fewer elf sections.
if (ctx.arg.zKeepDataSectionPrefix && index != 3) {
if (isSectionPrefix(".hot", s->name.substr(v.size())))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If StringRef name = s->name at the loop body beginning and use consume_front, we can remove substr(v.size()) here and below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Incorporate this by adding a helper function trimSectionPrefix. Since this function is only called once, I'm open to remove trimSectionPrefix and inline its content at the callsite.

return s->name.substr(0, v.size() + 4);
if (isSectionPrefix(".unlikely", s->name.substr(v.size())))
return s->name.substr(0, v.size() + 9);
// For .rodata, a section could be`.rodata.cst<N>.hot.` for constant
// pool or `rodata.str<N>.hot.` for string literals.
if (index == 2) {
if (isSectionSuffix(".hot", s->name)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isSectionSuffix supports both .rodata.hot and .rodata.hot.. I don't think we want to support .rodata.hot (which can be used by a function named hot). Supporting just .rodata.hot.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't think we want to support .rodata.hot (which can be used by a function named hot). Supporting just .rodata.hot.

sure. I think it's sensible to support only rodata.hot. (with trailing dot) to disambiguate functions and data.

return ".rodata.hot";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete braces for single-line simple body. Is it necessary to make the feature specific to .rodata? While I understand that it probably has not been extended to .data, we are adding an option with a quite generic name, can we include these other sections as well?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

delete braces for single-line simple body.

done

Is it necessary to make the feature specific to .rodata? While I understand that it probably has not been extended to .data, we are adding an option with a quite generic name, can we include these other sections as well?

This is a good question. The reason for if (index == 2) { (the whole if-block in [1]) is to have simpler code by spelling out .rodata.hot string literal when we cannot return a substring as a StringRef. I added a code comment for this.

To elaborate

  1. For majority cases, the section name has one of the three prefixes (.<section>.hot, .<section>.unlikely or .<section>). In those cases, the returned StringRef is a substring of s->name. This works well, without the need to separate if-else branches for different data sections.
  2. The isSectionPrefix pattern above cannot be easily extended to return StringRef as a substring of .str<N> or .cst<N> come in between, and specifically we cannot get StringRef from .rodata.str.hot.` .

[1]

if (index == 2) {
        if (s->name.ends_with(".hot."))
          return ".rodata.hot";

        if (s->name.ends_with(".unlikely."))
          return ".rodata.unlikely";
      }
}

}
if (isSectionSuffix(".unlikely", s->name)) {
return ".rodata.unlikely";
}
}
}
return v;
}
}

for (StringRef v :
{".ldata", ".lrodata", ".lbss", ".gcc_except_table", ".init_array",
".fini_array", ".tbss", ".tdata", ".ARM.exidx", ".ARM.extab", ".ctors",
".dtors", ".sbss", ".sdata", ".srodata"})
if (isSectionPrefix(v, s->name))
return v;

Expand Down
23 changes: 22 additions & 1 deletion lld/ELF/Writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -555,6 +555,27 @@ template <class ELFT> void Writer<ELFT>::addSectionSymbols() {
}
}

// Returns true if the section is a data section that's read only and
// relocatable per its section name.
static bool isRelRoDataSection(Ctx &ctx, StringRef secName) {
// The section name should start with ".data.rel.ro".
if (!secName.consume_front(".data.rel.ro"))
return false;

// If the section name is .data.rel.ro, it is a relocatable read-only data
// section.
if (secName.empty())
return true;
// If -z keep-data-section-prefix is given, '.data.rel.ro.hot' and
// '.data.rel.ro.unlikely' are considered a split of '.data.rel.ro' based on
// hotness.
if (ctx.arg.zKeepDataSectionPrefix) {
return secName == ".hot" || secName == ".unlikely";
}

return false;
}

// Today's loaders have a feature to make segments read-only after
// processing dynamic relocations to enhance security. PT_GNU_RELRO
// is defined for that.
Expand Down Expand Up @@ -631,7 +652,7 @@ static bool isRelroSection(Ctx &ctx, const OutputSection *sec) {
// magic section names.
StringRef s = sec->name;

bool abiAgnostic = s == ".data.rel.ro" || s == ".bss.rel.ro" ||
bool abiAgnostic = isRelRoDataSection(ctx, s) || s == ".bss.rel.ro" ||
s == ".ctors" || s == ".dtors" || s == ".jcr" ||
s == ".eh_frame" || s == ".fini_array" ||
s == ".init_array" || s == ".preinit_array";
Expand Down
127 changes: 127 additions & 0 deletions lld/test/ELF/data-section-prefix.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
# REQUIRES: x86
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a patch organization perspective, it would be clearer to first introduce the option in a patch and then add RELRO semantics in a follow-up patch (#148920). While it's unusual that 148920 was merged first, the changes remain on the right track... I would rename this to keep-data-section-prefix.s, not introducing a separate test file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From a patch organization perspective, it would be clearer to first introduce the option in a patch and then add RELRO semantics in a follow-up patch (#148920). While it's unusual that 148920 was merged first, the changes remain on the right track...

I appreciate you bringing up the merge order and agree with it. I've opted to merge #148920 first because linker script is an alternative option to this patch, and #148920 is on the common path of either linker script or section grouping.

I'll also follow up on the reviews to get this one ready asap.

I would rename this to keep-data-section-prefix.s, not introducing a separate test file.

Done.

## -z keep-data-section-prefix separates static data sections with prefix
## .<section>.hot, .<section>.unlikely in the absence of a SECTIONS command.

# RUN: rm -rf %t && split-file %s %t && cd %t

# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o

# RUN: ld.lld a.o -o out1
# RUN: llvm-readelf -S out1 | FileCheck --check-prefix=BASIC %s
# RUN: ld.lld -z nokeep-text-section-prefix a.o -o out2
# RUN: cmp out1 out2

# RUN: ld.lld -z keep-data-section-prefix a.o -o out3
# RUN: llvm-readelf -S out3 | FileCheck --check-prefix=KEEP %s

## With a SECTIONS command, orphan sections are created verbatim.
## No grouping is performed for them.
# RUN: ld.lld -T x.lds -z keep-data-section-prefix a.o -o out4
# RUN: llvm-readelf -S out4 | FileCheck --check-prefix=SCRIPT %s

# BASIC: [Nr] Name Type Address Off Size
# BASIC: [ 1] .text
# BASIC-NEXT: [ 2] .data.rel.ro PROGBITS 00000000002021c9 0001c9 00000f
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The addresses are not useful. They could cause complexity when lld's address assignment changes. Suggest:

# BASIC-NEXT: [ 2] .data.rel.ro PROGBITS

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The addresses are not useful. They could cause complexity when lld's address assignment changes

Got it. I replaced Address and Off columns with {{.*}} and kept Size column.

# BASIC-NEXT: [ 3] .bss.rel.ro NOBITS 00000000002021d8 0001d8 000008
# BASIC-NEXT: [ 4] .relro_padding NOBITS 00000000002021e0 0001d8 000e20
# BASIC-NEXT: [ 5] .rodata PROGBITS 00000000002031e0 0001e0 000006
# BASIC-NEXT: [ 6] .data PROGBITS 00000000002031e6 0001e6 000004
# BASIC-NEXT: [ 7] .bss NOBITS 00000000002031ea 0001ea 000004

# KEEP: [Nr] Name Type Address Off Size
# KEEP: [ 1] .text
# KEEP-NEXT: [ 2] .data.rel.ro PROGBITS 00000000002021c9 0001c9 000009
# KEEP-NEXT: [ 3] .data.rel.ro.hot PROGBITS 00000000002021d2 0001d2 000004
# KEEP-NEXT: [ 4] .data.rel.ro.unlikely PROGBITS 00000000002021d6 0001d6 000002
# KEEP-NEXT: [ 5] .bss.rel.ro NOBITS 00000000002021d8 0001d8 000008
# KEEP-NEXT: [ 6] .relro_padding NOBITS 00000000002021e0 0001d8 000e20
# KEEP-NEXT: [ 7] .rodata PROGBITS 00000000002031e0 0001e0 000002
# KEEP-NEXT: [ 8] .rodata.hot PROGBITS 00000000002031e2 0001e2 000002
# KEEP-NEXT: [ 9] .rodata.unlikely PROGBITS 00000000002031e4 0001e4 000002
# KEEP-NEXT: [10] .data PROGBITS 00000000002031e6 0001e6 000002
# KEEP-NEXT: [11] .data.hot PROGBITS 00000000002031e8 0001e8 000001
# KEEP-NEXT: [12] .data.unlikely PROGBITS 00000000002031e9 0001e9 000001
# KEEP-NEXT: [13] .bss NOBITS 00000000002031ea 0001ea 000002
# KEEP-NEXT: [14] .bss.hot NOBITS 00000000002031ec 0001ea 000001
# KEEP-NEXT: [15] .bss.unlikely NOBITS 00000000002031ed 0001ea 000001

# SCRIPT: .text
# SCRIPT-NEXT: .rodata.i
# SCRIPT-NEXT: .rodata.hot.j
# SCRIPT-NEXT: .rodata.unlikely.k
# SCRIPT-NEXT: .rodata.split.l
# SCRIPT-NEXT: .rodata.cst32.hot.
# SCRIPT-NEXT: .rodata.str1.1.unlikely.
# SCRIPT-NEXT: .data.m
# SCRIPT-NEXT: .data.hot.n
# SCRIPT-NEXT: .data.unlikely.o
# SCRIPT-NEXT: .data.split.p
# SCRIPT-NEXT: .data.rel.ro.q
# SCRIPT-NEXT: .data.rel.ro.hot.r
# SCRIPT-NEXT: .data.rel.ro.unlikely.s
# SCRIPT-NEXT: .data.rel.ro.split.t
# SCRIPT-NEXT: .bss.a
# SCRIPT-NEXT: .bss.hot.b
# SCRIPT-NEXT: .bss.unlikely.c
# SCRIPT-NEXT: .bss.split.d
# SCRIPT-NEXT: .bss.rel.ro.e
# SCRIPT-NEXT: .bss.rel.ro.hot.f
# SCRIPT-NEXT: .bss.rel.ro.unlikely.g
# SCRIPT-NEXT: .bss.rel.ro.split.h

#--- a.s
.globl _start
_start:
ret

.section .bss.a,"aw"
.byte 0
.section .bss.hot.b,"aw"
.byte 0
.section .bss.unlikely.c,"aw"
.byte 0
.section .bss.split.d,"aw"
.byte 0

.section .bss.rel.ro.e, "aw"
.space 2
.section .bss.rel.ro.hot.f, "aw"
.space 2
.section .bss.rel.ro.unlikely.g, "aw"
.space 2
.section .bss.rel.ro.split.h, "aw"
.space 2

.section .rodata.i,"aw"
.byte 1
.section .rodata.hot.j,"aw"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can test .rodata.hot. (without j), for -fno-unique-section-names

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done.

.byte 2
.section .rodata.unlikely.k,"aw"
.byte 3
.section .rodata.split.l,"aw"
.byte 4
.section .rodata.cst32.hot.,"aw"
.byte 5
.section .rodata.str1.1.unlikely.,"aw"
.byte 6

.section .data.m,"aw"
.byte 5
.section .data.hot.n,"aw"
.byte 6
.section .data.unlikely.o,"aw"
.byte 7
.section .data.split.p,"aw"
.byte 8

.section .data.rel.ro.q,"aw"
.quad 0
.section .data.rel.ro.hot.r,"aw"
.long 255
.section .data.rel.ro.unlikely.s,"aw"
.word 1
.section .data.rel.ro.split.t,"aw"
.byte 0

#--- x.lds
SECTIONS {}