llvm · mingmingl-llvm · Mar 30, 2025 · Jul 21, 2025 · Jul 21, 2025 · Jul 22, 2025
diff --git a/lld/ELF/LinkerScript.cpp b/lld/ELF/LinkerScript.cpp
@@ -103,6 +103,54 @@ StringRef LinkerScript::getOutputSectionName(const InputSectionBase *s) const {
     return ".text";
   }
 
+  // When zKeepDataSectionPrefix is true, keep .hot and .unlikely suffixes
+  // in data sections.
+  static constexpr StringRef dataSectionPrefixes[] = {
+      ".data.rel.ro", ".data", ".rodata", ".bss.rel.ro", ".bss",
+  };
+
+  // If keep-data-section-prefix is enabled, map hot-prefixed data sections
+  // to a .hot variant in the output and map unlikely-prefixed data sections
+  // to a .unlikely variant. Mapping for the hot input sections is illustrated
+  // below, and the same applies for unlikely ones.
+  // [bar] is a placeholder to represent optional global variable name below
+  // - .data.rel.ro.hot.[bar]  => .data.rel.ro.hot
+  // - .data.hot.[bar] => .data.hot
+  // - {.rodata.hot.[bar], .rodata.str.*.hot., .rodata.cst*.hot.} => .rodata.hot
+  // - .bss.rel.ro => .bss.rel.ro
+  // - .bss.hot.[bar] => .bss.hot
+  // Note .bss.rel.ro doesn't have hot / unlikely mapping. It's placed before
+  // .bss so they get processed before `.bss` prefix is seen, just like
+  // how `.data.rel.ro` should be processed before seeing the `.data` prefix.
+  if (ctx.arg.zKeepDataSectionPrefix)
+    for (auto [index, v] : llvm::enumerate(dataSectionPrefixes)) {
+      StringRef secName = s->name;
+      // If v is the prefix, trim it from secName. Otherwise just continue to
+      // try the next prefix.
+      if (!secName.consume_front(v))
+        continue;
+
+      // Object file writer emits the trailing dot in `.hot.` and `.unlikely.`
+      // to disambiguate between `.<section>.<variable-name>` (without trailing
+      // dot) and `.<section>.hot.[optional-variable-name]`. We check the same
+      // (trailing dot required) to not map a C variable named `unlikely` to a
+      // unlikely variant.
+      if (secName.starts_with(".hot."))
+        return s->name.substr(0, v.size() + 4);
+      if (secName.starts_with(".unlikely."))
+        return s->name.substr(0, v.size() + 9);
+      if (index == 2) {
+        // Place input .rodata.str<N>.hot. or .rodata.cst<N>.hot. into the
+        // .rodata.hot section.
+        if (s->name.ends_with(".hot."))
+          return ".rodata.hot";
+        // Place input .rodata.str<N>.hot. or .rodata.cst<N>.unlikely. into
+        // the .rodata.unlikely section.
+        if (s->name.ends_with(".unlikely."))
+          return ".rodata.unlikely";
+      }
+    }
+
   for (StringRef v : {".data.rel.ro", ".data",       ".rodata",
                       ".bss.rel.ro",  ".bss",        ".ldata",
                       ".lrodata",     ".lbss",       ".gcc_except_table",

diff --git a/lld/test/ELF/keep-data-section-prefix.s b/lld/test/ELF/keep-data-section-prefix.s
@@ -1,32 +1,49 @@
 # REQUIRES: x86
+## -z keep-data-section-prefix separates static data sections with prefix
+## .<section>.hot, .<section>.unlikely in the absence of a SECTIONS command.
 
 # RUN: rm -rf %t && split-file %s %t && cd %t
 
+## Test that lld knows .data.rel.ro.unlikely and .data.rel.ro.hot are relocatable
+## read-only data sections.
 # RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o
 
-# RUN: ld.lld -z keep-data-section-prefix -T x.lds a.o -o out1
+# RUN: ld.lld -z keep-data-section-prefix -T x1.lds a.o -o out1
 # RUN: llvm-readelf -l out1 | FileCheck --check-prefixes=SEG,LS %s
 # RUN: llvm-readelf -S out1 | FileCheck %s --check-prefix=CHECK-LS
 
-# RUN: ld.lld -z keep-data-section-prefix a.o -o out2
-# RUN: llvm-readelf -l out2 | FileCheck --check-prefixes=SEG,PRE %s
-# RUN: llvm-readelf -S out2 | FileCheck %s --check-prefix=CHECK-PRE
-
 # RUN: ld.lld a.o -o out3
 # RUN: llvm-readelf -l out3 | FileCheck --check-prefixes=SEG,PRE %s
 # RUN: llvm-readelf -S out3 | FileCheck %s --check-prefix=CHECK-PRE
 
-# RUN: not ld.lld -T x.lds a.o 2>&1 | FileCheck %s
+# RUN: not ld.lld -T x1.lds a.o 2>&1 | FileCheck %s
 # CHECK: error: section: .relro_padding is not contiguous with other relro sections
 
+## Test that lld can group data sections based on its hotness prefix.
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o
+
+# RUN: ld.lld b.o -o out1
+# RUN: llvm-readelf -S out1 | FileCheck --check-prefix=BASIC %s
+# RUN: ld.lld -z nokeep-text-section-prefix b.o -o out2
+# RUN: cmp out1 out2
+
+## With a SECTIONS command, orphan sections are created verbatim.
+## No grouping is performed for them.
+# RUN: ld.lld -T x2.lds -z keep-data-section-prefix b.o -o out4
+# RUN: llvm-readelf -S out4 | FileCheck --check-prefix=SCRIPT %s
+
+# RUN: ld.lld -z keep-data-section-prefix b.o -o out3
+# RUN: llvm-readelf -S out3 | FileCheck --check-prefix=KEEP %s
+
 ## The first RW PT_LOAD segment has FileSiz 0x126f (0x1000 + 0x200 + 0x60 + 0xf),
 ## and its p_offset p_vaddr p_paddr p_filesz should match PT_GNU_RELRO.
-#           Type           Offset   VirtAddr           PhysAddr           FileSiz  MemSiz   Flg Align
-# SEG:      LOAD           0x0001c8 0x00000000002011c8 0x00000000002011c8 0x000001 0x000001 R E 0x1000
-# SEG-NEXT: LOAD           0x0001c9 0x00000000002021c9 0x00000000002021c9 0x00126f 0x001e37 RW  0x1000
-# SEG-NEXT: LOAD           0x001438 0x0000000000204438 0x0000000000204438 0x000001 0x000002 RW  0x1000
-# SEG-NEXT: GNU_RELRO      0x0001c9 0x00000000002021c9 0x00000000002021c9 0x00126f 0x001e37 R   0x1
-# SEG-NEXT: GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x0
+#           Type           {{.*}}           FileSiz  MemSiz   Flg
+# SEG:      LOAD           {{.*}}           0x000001 0x000001 R E
+# SEG-NEXT: LOAD           {{.*}}           0x00126f 0x001e37 RW
+# SEG-NEXT: LOAD           {{.*}}           0x000001 0x000002 RW
+# SEG-NEXT: GNU_RELRO      {{.*}}           0x00126f 0x001e37 R
+# SEG-NEXT: GNU_STACK      {{.*}}           0x000000 0x000000 RW
 
 ## Input to output mapping per linker script
 ##   .data.rel.ro.split -> .data.rel.ro
@@ -37,34 +54,80 @@
 # LS-NEXT: .data.rel.ro.hot .data.rel.ro .data.rel.ro.unlikely .relro_padding
 # LS-NEXT: .data .bss
 
-#        [Nr] Name                    Type            Address          Off    Size
-# CHECK-LS:      .data.rel.ro.hot        PROGBITS        00000000002021c9 0001c9 00000f
-# CHECK-LS-NEXT: .data.rel.ro            PROGBITS        00000000002021d8 0001d8 000260
-# CHECK-LS-NEXT: .data.rel.ro.unlikely   PROGBITS        0000000000202438 000438 001000
-# CHECK-LS-NEXT: .relro_padding          NOBITS          0000000000203438 001438 000bc8
-# CHECK-LS-NEXT: .data                   PROGBITS        0000000000204438 001438 000001
-# CHECK-LS-NEXT: .bss                    NOBITS          0000000000204439 001439 000001
+#        [Nr] Name                    Type           {{.*}}    Size
+# CHECK-LS:      .data.rel.ro.hot        PROGBITS    {{.*}} 00000f
+# CHECK-LS-NEXT: .data.rel.ro            PROGBITS    {{.*}} 000260
+# CHECK-LS-NEXT: .data.rel.ro.unlikely   PROGBITS    {{.*}} 001000
+# CHECK-LS-NEXT: .relro_padding          NOBITS      {{.*}} 000bc8
+# CHECK-LS-NEXT: .data                   PROGBITS    {{.*}} 000001
+# CHECK-LS-NEXT: .bss                    NOBITS      {{.*}} 000001
 
 ## Linker script is not provided to map data sections.
 ## So all input sections with prefix .data.rel.ro will map to .data.rel.ro in the output.
 # PRE:      .text
 # PRE-NEXT: .data.rel.ro .relro_padding
 # PRE-NEXT: .data .bss
 
-#        [Nr] Name                    Type            Address          Off    Size
-# CHECK-PRE:      .data.rel.ro            PROGBITS        00000000002021c9 0001c9 00126f
-# CHECK-PRE-NEXT: .relro_padding          NOBITS          0000000000203438 001438 000bc8
-# CHECK-PRE-NEXT: .data                   PROGBITS        0000000000204438 001438 000001
-# CHECK-PRE-NEXT: .bss                    NOBITS          0000000000204439 001439 000001
-
-#--- x.lds
+#        [Nr] Name                    Type            {{.*}}    Size
+# CHECK-PRE:      .data.rel.ro            PROGBITS    {{.*}}    00126f
+# CHECK-PRE-NEXT: .relro_padding          NOBITS      {{.*}}    000bc8
+# CHECK-PRE-NEXT: .data                   PROGBITS    {{.*}}    000001
+# CHECK-PRE-NEXT: .bss                    NOBITS      {{.*}}    000001
+
+# BASIC:         [Nr] Name              Type            {{.*}}   Size              
+# BASIC:         [ 1] .text       
+# BASIC-NEXT:    [ 2] .data.rel.ro      PROGBITS        {{.*}}   00000f
+# BASIC-NEXT:    [ 3] .bss.rel.ro       NOBITS          {{.*}}   000002
+# BASIC-NEXT:    [ 4] .relro_padding    NOBITS          {{.*}}   000e26
+# BASIC-NEXT:    [ 5] .rodata           PROGBITS        {{.*}}   000006
+# BASIC-NEXT:    [ 6] .data             PROGBITS        {{.*}}   000004
+# BASIC-NEXT:    [ 7] .bss              NOBITS          {{.*}}  000004
+
+# KEEP:       [Nr]  Name                    Type            {{.*}}    Size
+# KEEP:       [ 1] .text
+# KEEP-NEXT:  [ 2] .data.rel.ro             PROGBITS        {{.*}} 000009
+# KEEP-NEXT:  [ 3] .data.rel.ro.hot         PROGBITS        {{.*}} 000004
+# KEEP-NEXT:  [ 4] .data.rel.ro.unlikely    PROGBITS        {{.*}} 000002
+# KEEP-NEXT:  [ 5] .bss.rel.ro              NOBITS          {{.*}} 000002
+# KEEP-NEXT:  [ 6] .relro_padding           NOBITS          {{.*}} 000e26
+# KEEP-NEXT:  [ 7] .rodata                  PROGBITS        {{.*}} 000002
+# KEEP-NEXT:  [ 8] .rodata.hot              PROGBITS        {{.*}} 000002
+# KEEP-NEXT:  [ 9] .rodata.unlikely         PROGBITS        {{.*}} 000002
+# KEEP-NEXT:  [10] .data                    PROGBITS        {{.*}} 000002
+# KEEP-NEXT:  [11] .data.hot                PROGBITS        {{.*}} 000001
+# KEEP-NEXT:  [12] .data.unlikely           PROGBITS        {{.*}} 000001
+# KEEP-NEXT:  [13] .bss                     NOBITS          {{.*}} 000002
+# KEEP-NEXT:  [14] .bss.hot                 NOBITS          {{.*}} 000001
+# KEEP-NEXT:  [15] .bss.unlikely            NOBITS          {{.*}} 000001
+
+# SCRIPT:      .text
+# SCRIPT-NEXT: .bss.rel.ro
+# SCRIPT-NEXT: .rodata.i
+# SCRIPT-NEXT: .rodata.hot.
+# SCRIPT-NEXT: .rodata.unlikely.k
+# SCRIPT-NEXT: .rodata.split.l
+# SCRIPT-NEXT: .rodata.cst32.hot.
+# SCRIPT-NEXT: .rodata.str1.1.unlikely.
+# SCRIPT-NEXT: .data.m
+# SCRIPT-NEXT: .data.hot.n
+# SCRIPT-NEXT: .data.unlikely.o
+# SCRIPT-NEXT: .data.split.p
+# SCRIPT-NEXT: .data.rel.ro.q
+# SCRIPT-NEXT: .data.rel.ro.hot.r
+# SCRIPT-NEXT: .data.rel.ro.unlikely.s
+# SCRIPT-NEXT: .data.rel.ro.split.t
+# SCRIPT-NEXT: .bss.a
+# SCRIPT-NEXT: .bss.hot.b
+# SCRIPT-NEXT: .bss.unlikely.c
+# SCRIPT-NEXT: .bss.split.d
+
+#--- x1.lds
 SECTIONS {
   .data.rel.ro.hot : { *(.data.rel.ro.hot) }
   .data.rel.ro : { .data.rel.ro }
   .data.rel.ro.unlikely : { *(.data.rel.ro.unlikely) }
 } INSERT AFTER .text
 
-
 #--- a.s
 .globl _start
 _start:
@@ -87,3 +150,54 @@ _start:
 
 .section .bss, "aw"
 .space 1
+
+#--- b.s
+.globl _start
+_start:
+  ret
+
+.section .bss.a,"aw"
+  .byte 0
+.section .bss.hot.b,"aw"
+  .byte 0
+.section .bss.unlikely.c.,"aw"
+  .byte 0
+.section .bss.split.d,"aw"
+  .byte 0
+
+.section .bss.rel.ro, "aw"
+  .space 2
+
+.section .rodata.i,"aw"
+  .byte 1
+.section .rodata.hot.,"aw"
+  .byte 2
+.section .rodata.unlikely.k,"aw"
+  .byte 3
+.section .rodata.split.l,"aw"
+  .byte 4
+.section .rodata.cst32.hot.,"aw"
+  .byte 5
+.section .rodata.str1.1.unlikely.,"aw"
+  .byte 6
+
+.section .data.m,"aw"
+  .byte 5
+.section .data.hot.n,"aw"
+  .byte 6
+.section .data.unlikely.o,"aw"
+  .byte 7
+.section .data.split.p,"aw"
+  .byte 8
+
+.section .data.rel.ro.q,"aw"
+  .quad 0 
+.section .data.rel.ro.hot.r,"aw"
+  .long 255
+.section .data.rel.ro.unlikely.s,"aw"
+  .word 1
+.section .data.rel.ro.split.t,"aw"
+  .byte 0
+
+#--- x2.lds
+SECTIONS {}