[AArch64] Enable GlobalMerge on externals #158592

davemgreen · 2025-09-15T09:45:25Z

GlobalMerge has been enabled for minsize for a while, this patch enables it more generally. In my testing it did not affect performance very much, especially with the linker relaxations we already perform, but should help reduce code size a little.

llvmbot · 2025-09-15T09:46:00Z

@llvm/pr-subscribers-backend-aarch64

Author: David Green (davemgreen)

Changes

GlobalMerge has been enabled for minsize for a while, this patch enables it more generally. In my testing it did not affect performance very much, especially with the linker relaxations we already perform, but should help reduce code size a little.

Full diff: https://github.com/llvm/llvm-project/pull/158592.diff

4 Files Affected:

(modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (-6)
(modified) llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll (+7-7)
(modified) llvm/test/CodeGen/AArch64/global-merge-external.ll (+17-10)
(modified) llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll (+4-5)

diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index dde1d88403bfe..5b80b08375f8c 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -694,12 +694,6 @@ bool AArch64PassConfig::addPreISel() {
     // is disabled as we emit the .subsections_via_symbols directive which
     // means that merging extern globals is not safe.
     bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
-
-    // FIXME: extern global merging is only enabled when we optimise for size
-    // because there are some regressions with it also enabled for performance.
-    if (!OnlyOptimizeForSize)
-      MergeExternalByDefault = false;
-
     addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
                                   MergeExternalByDefault));
   }
diff --git a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
index f37c942ab950c..dd0152c3f4c08 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
@@ -35,24 +35,24 @@ define dso_local void @testcase(ptr nocapture %arg){
 ;
 ; CHECK-O3-LABEL: testcase:
 ; CHECK-O3:       // %bb.0: // %entry
-; CHECK-O3-NEXT:    adrp x8, global_ptr
-; CHECK-O3-NEXT:    ldr x9, [x8, :lo12:global_ptr]
+; CHECK-O3-NEXT:    adrp x8, .L_MergedGlobals+8
+; CHECK-O3-NEXT:    ldr x9, [x8, :lo12:.L_MergedGlobals+8]
 ; CHECK-O3-NEXT:    cbz x9, .LBB0_2
 ; CHECK-O3-NEXT:  // %bb.1: // %if.then
 ; CHECK-O3-NEXT:    ldr x9, [x9]
 ; CHECK-O3-NEXT:    str x9, [x0]
-; CHECK-O3-NEXT:    ldr x8, [x8, :lo12:global_ptr]
-; CHECK-O3-NEXT:    adrp x9, global_int
+; CHECK-O3-NEXT:    ldr x8, [x8, :lo12:.L_MergedGlobals+8]
+; CHECK-O3-NEXT:    adrp x9, .L_MergedGlobals
 ; CHECK-O3-NEXT:    add x2, x8, #16
 ; CHECK-O3-NEXT:    mov w0, #10 // =0xa
-; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:global_int]
+; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:.L_MergedGlobals]
 ; CHECK-O3-NEXT:    b externalfunc
 ; CHECK-O3-NEXT:  .LBB0_2:
 ; CHECK-O3-NEXT:    mov x8, xzr
-; CHECK-O3-NEXT:    adrp x9, global_int
+; CHECK-O3-NEXT:    adrp x9, .L_MergedGlobals
 ; CHECK-O3-NEXT:    add x2, x8, #16
 ; CHECK-O3-NEXT:    mov w0, #10 // =0xa
-; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:global_int]
+; CHECK-O3-NEXT:    ldr w1, [x9, :lo12:.L_MergedGlobals]
 ; CHECK-O3-NEXT:    b externalfunc
 ;
 ; CHECK-O2-6-LABEL: testcase:
diff --git a/llvm/test/CodeGen/AArch64/global-merge-external.ll b/llvm/test/CodeGen/AArch64/global-merge-external.ll
index fb3753c54e0ca..7f2b1e38ea4fc 100644
--- a/llvm/test/CodeGen/AArch64/global-merge-external.ll
+++ b/llvm/test/CodeGen/AArch64/global-merge-external.ll
@@ -9,14 +9,22 @@ target triple = "aarch64"
 @global1 = dso_local local_unnamed_addr global i32 0, align 4
 
 define dso_local i32 @func() {
-; CHECK-LABEL: func:
-; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    adrp x8, global0
-; CHECK-NEXT:    adrp x9, global1
-; CHECK-NEXT:    ldr w8, [x8, :lo12:global0]
-; CHECK-NEXT:    ldr w9, [x9, :lo12:global1]
-; CHECK-NEXT:    add w0, w9, w8
-; CHECK-NEXT:    ret
+; CHECK-O2-LABEL: func:
+; CHECK-O2:       // %bb.0: // %entry
+; CHECK-O2-NEXT:    adrp x8, global0
+; CHECK-O2-NEXT:    adrp x9, global1
+; CHECK-O2-NEXT:    ldr w8, [x8, :lo12:global0]
+; CHECK-O2-NEXT:    ldr w9, [x9, :lo12:global1]
+; CHECK-O2-NEXT:    add w0, w9, w8
+; CHECK-O2-NEXT:    ret
+;
+; CHECK-O3-LABEL: func:
+; CHECK-O3:       // %bb.0: // %entry
+; CHECK-O3-NEXT:    adrp x8, .L_MergedGlobals
+; CHECK-O3-NEXT:    add x8, x8, :lo12:.L_MergedGlobals
+; CHECK-O3-NEXT:    ldp w9, w8, [x8]
+; CHECK-O3-NEXT:    add w0, w8, w9
+; CHECK-O3-NEXT:    ret
 entry:
   %0 = load i32, ptr @global0, align 4
   %1 = load i32, ptr @global1, align 4
@@ -24,5 +32,4 @@ entry:
   ret i32 %add
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-O2: {{.*}}
-; CHECK-O3: {{.*}}
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
index caf6f1a83f762..1207eaa2612a3 100644
--- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
+++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
@@ -26,8 +26,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
 ; CHECK-ASM-NEXT:    cbz x9, .LBB0_5
 ; CHECK-ASM-NEXT:  // %bb.2:
 ; CHECK-ASM-NEXT:    ldrsw x9, [sp, #8]
-; CHECK-ASM-NEXT:    adrp x10, B
-; CHECK-ASM-NEXT:    add x10, x10, :lo12:B
+; CHECK-ASM-NEXT:    adrp x10, .L_MergedGlobals
+; CHECK-ASM-NEXT:    add x10, x10, :lo12:.L_MergedGlobals
 ; CHECK-ASM-NEXT:    strb wzr, [x10, x8]
 ; CHECK-ASM-NEXT:    cmp x9, #10
 ; CHECK-ASM-NEXT:    b.hi .LBB0_6
@@ -36,9 +36,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
 ; CHECK-ASM-NEXT:    sub x8, x8, x9
 ; CHECK-ASM-NEXT:    cbz x8, .LBB0_6
 ; CHECK-ASM-NEXT:  // %bb.4:
-; CHECK-ASM-NEXT:    adrp x8, B2
-; CHECK-ASM-NEXT:    add x8, x8, :lo12:B2
-; CHECK-ASM-NEXT:    strb wzr, [x8, x9]
+; CHECK-ASM-NEXT:    add x8, x10, x9
+; CHECK-ASM-NEXT:    strb wzr, [x8, #10]
 ; CHECK-ASM-NEXT:    add sp, sp, #16
 ; CHECK-ASM-NEXT:    .cfi_def_cfa_offset 0
 ; CHECK-ASM-NEXT:    ret

sjoerdmeijer · 2025-09-15T09:56:57Z

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

    bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
-
-    // FIXME: extern global merging is only enabled when we optimise for size
-    // because there are some regressions with it also enabled for performance.


I was going to ask about these "some regressions", but did a git blame first and noticed I contributed this, so yes, that's funny and I can't remember what they were. Does this ring a bell to you, @davemgreen ? If I had to take a guess, then I did some measurements on smaller A-cores, i.e. not the wide X or Neoverse cores we have today. You did mention that it didn't affect perf very much. I am fine with the change, but my only question out of curiousity is which cores you've tested.

https://reviews.llvm.org/D61947 mentions one small regression in spec2000, but it was apparently ran with -Oz.

I haven't seen any worrying performance results myself, either on server class cpus or models.

sjoerdmeijer

Cheers, LGTM.

davemgreen requested review from SamTebbs33, nasherm and sjoerdmeijer September 15, 2025 09:45

llvmbot added the backend:AArch64 label Sep 15, 2025

davemgreen requested a review from c-rhodes September 15, 2025 09:45

sjoerdmeijer reviewed Sep 15, 2025

View reviewed changes

sjoerdmeijer approved these changes Sep 16, 2025

View reviewed changes

davemgreen merged commit a50c11a into llvm:main Sep 17, 2025
11 of 12 checks passed

davemgreen deleted the gh-a64-globalmerge branch September 17, 2025 16:49

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[AArch64] Enable GlobalMerge on externals #158592

[AArch64] Enable GlobalMerge on externals #158592

davemgreen commented Sep 15, 2025

Uh oh!

llvmbot commented Sep 15, 2025

Uh oh!

sjoerdmeijer Sep 15, 2025

Uh oh!

davemgreen Sep 15, 2025

Uh oh!

sjoerdmeijer left a comment

Uh oh!

Uh oh!

Uh oh!

[AArch64] Enable GlobalMerge on externals #158592

[AArch64] Enable GlobalMerge on externals #158592

Conversation

davemgreen commented Sep 15, 2025

Uh oh!

llvmbot commented Sep 15, 2025

Uh oh!

sjoerdmeijer Sep 15, 2025

Choose a reason for hiding this comment

Uh oh!

davemgreen Sep 15, 2025

Choose a reason for hiding this comment

Uh oh!

sjoerdmeijer left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!