From 9d15fead40566708dabd39dccdc386daa7f5ea9d Mon Sep 17 00:00:00 2001 From: David Green Date: Mon, 15 Sep 2025 10:41:07 +0100 Subject: [PATCH] [AArch64] Enable GlobalMerge on externals GlobalMerge has been enabled for minsize for a while, this patch enables it more generally. In my testing it did not affect performance very much, especially with the linker relaxations we already perform, but should help reduce code size a little. --- .../Target/AArch64/AArch64TargetMachine.cpp | 6 ----- .../CodeGen/AArch64/aarch64-tail-dup-size.ll | 14 +++++----- .../CodeGen/AArch64/global-merge-external.ll | 27 ++++++++++++------- .../AArch64/local-bounds-single-trap.ll | 9 +++---- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index dde1d88403bfe..5b80b08375f8c 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -694,12 +694,6 @@ bool AArch64PassConfig::addPreISel() { // is disabled as we emit the .subsections_via_symbols directive which // means that merging extern globals is not safe. bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); - - // FIXME: extern global merging is only enabled when we optimise for size - // because there are some regressions with it also enabled for performance. - if (!OnlyOptimizeForSize) - MergeExternalByDefault = false; - addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize, MergeExternalByDefault)); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll index f37c942ab950c..dd0152c3f4c08 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll @@ -35,24 +35,24 @@ define dso_local void @testcase(ptr nocapture %arg){ ; ; CHECK-O3-LABEL: testcase: ; CHECK-O3: // %bb.0: // %entry -; CHECK-O3-NEXT: adrp x8, global_ptr -; CHECK-O3-NEXT: ldr x9, [x8, :lo12:global_ptr] +; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals+8 +; CHECK-O3-NEXT: ldr x9, [x8, :lo12:.L_MergedGlobals+8] ; CHECK-O3-NEXT: cbz x9, .LBB0_2 ; CHECK-O3-NEXT: // %bb.1: // %if.then ; CHECK-O3-NEXT: ldr x9, [x9] ; CHECK-O3-NEXT: str x9, [x0] -; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr] -; CHECK-O3-NEXT: adrp x9, global_int +; CHECK-O3-NEXT: ldr x8, [x8, :lo12:.L_MergedGlobals+8] +; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals ; CHECK-O3-NEXT: add x2, x8, #16 ; CHECK-O3-NEXT: mov w0, #10 // =0xa -; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals] ; CHECK-O3-NEXT: b externalfunc ; CHECK-O3-NEXT: .LBB0_2: ; CHECK-O3-NEXT: mov x8, xzr -; CHECK-O3-NEXT: adrp x9, global_int +; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals ; CHECK-O3-NEXT: add x2, x8, #16 ; CHECK-O3-NEXT: mov w0, #10 // =0xa -; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int] +; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals] ; CHECK-O3-NEXT: b externalfunc ; ; CHECK-O2-6-LABEL: testcase: diff --git a/llvm/test/CodeGen/AArch64/global-merge-external.ll b/llvm/test/CodeGen/AArch64/global-merge-external.ll index fb3753c54e0ca..7f2b1e38ea4fc 100644 --- a/llvm/test/CodeGen/AArch64/global-merge-external.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-external.ll @@ -9,14 +9,22 @@ target triple = "aarch64" @global1 = dso_local local_unnamed_addr global i32 0, align 4 define dso_local i32 @func() { -; CHECK-LABEL: func: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, global0 -; CHECK-NEXT: adrp x9, global1 -; CHECK-NEXT: ldr w8, [x8, :lo12:global0] -; CHECK-NEXT: ldr w9, [x9, :lo12:global1] -; CHECK-NEXT: add w0, w9, w8 -; CHECK-NEXT: ret +; CHECK-O2-LABEL: func: +; CHECK-O2: // %bb.0: // %entry +; CHECK-O2-NEXT: adrp x8, global0 +; CHECK-O2-NEXT: adrp x9, global1 +; CHECK-O2-NEXT: ldr w8, [x8, :lo12:global0] +; CHECK-O2-NEXT: ldr w9, [x9, :lo12:global1] +; CHECK-O2-NEXT: add w0, w9, w8 +; CHECK-O2-NEXT: ret +; +; CHECK-O3-LABEL: func: +; CHECK-O3: // %bb.0: // %entry +; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals +; CHECK-O3-NEXT: add x8, x8, :lo12:.L_MergedGlobals +; CHECK-O3-NEXT: ldp w9, w8, [x8] +; CHECK-O3-NEXT: add w0, w8, w9 +; CHECK-O3-NEXT: ret entry: %0 = load i32, ptr @global0, align 4 %1 = load i32, ptr @global1, align 4 @@ -24,5 +32,4 @@ entry: ret i32 %add } ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; CHECK-O2: {{.*}} -; CHECK-O3: {{.*}} +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll index caf6f1a83f762..1207eaa2612a3 100644 --- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll +++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll @@ -26,8 +26,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 { ; CHECK-ASM-NEXT: cbz x9, .LBB0_5 ; CHECK-ASM-NEXT: // %bb.2: ; CHECK-ASM-NEXT: ldrsw x9, [sp, #8] -; CHECK-ASM-NEXT: adrp x10, B -; CHECK-ASM-NEXT: add x10, x10, :lo12:B +; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals +; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals ; CHECK-ASM-NEXT: strb wzr, [x10, x8] ; CHECK-ASM-NEXT: cmp x9, #10 ; CHECK-ASM-NEXT: b.hi .LBB0_6 @@ -36,9 +36,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 { ; CHECK-ASM-NEXT: sub x8, x8, x9 ; CHECK-ASM-NEXT: cbz x8, .LBB0_6 ; CHECK-ASM-NEXT: // %bb.4: -; CHECK-ASM-NEXT: adrp x8, B2 -; CHECK-ASM-NEXT: add x8, x8, :lo12:B2 -; CHECK-ASM-NEXT: strb wzr, [x8, x9] +; CHECK-ASM-NEXT: add x8, x10, x9 +; CHECK-ASM-NEXT: strb wzr, [x8, #10] ; CHECK-ASM-NEXT: add sp, sp, #16 ; CHECK-ASM-NEXT: .cfi_def_cfa_offset 0 ; CHECK-ASM-NEXT: ret