Skip to content

Commit 9d15fea

Browse files
committed
[AArch64] Enable GlobalMerge on externals
GlobalMerge has been enabled for minsize for a while, this patch enables it more generally. In my testing it did not affect performance very much, especially with the linker relaxations we already perform, but should help reduce code size a little.
1 parent 273917e commit 9d15fea

File tree

4 files changed

+28
-28
lines changed

4 files changed

+28
-28
lines changed

llvm/lib/Target/AArch64/AArch64TargetMachine.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -694,12 +694,6 @@ bool AArch64PassConfig::addPreISel() {
694694
// is disabled as we emit the .subsections_via_symbols directive which
695695
// means that merging extern globals is not safe.
696696
bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();
697-
698-
// FIXME: extern global merging is only enabled when we optimise for size
699-
// because there are some regressions with it also enabled for performance.
700-
if (!OnlyOptimizeForSize)
701-
MergeExternalByDefault = false;
702-
703697
addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
704698
MergeExternalByDefault));
705699
}

llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,24 @@ define dso_local void @testcase(ptr nocapture %arg){
3535
;
3636
; CHECK-O3-LABEL: testcase:
3737
; CHECK-O3: // %bb.0: // %entry
38-
; CHECK-O3-NEXT: adrp x8, global_ptr
39-
; CHECK-O3-NEXT: ldr x9, [x8, :lo12:global_ptr]
38+
; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals+8
39+
; CHECK-O3-NEXT: ldr x9, [x8, :lo12:.L_MergedGlobals+8]
4040
; CHECK-O3-NEXT: cbz x9, .LBB0_2
4141
; CHECK-O3-NEXT: // %bb.1: // %if.then
4242
; CHECK-O3-NEXT: ldr x9, [x9]
4343
; CHECK-O3-NEXT: str x9, [x0]
44-
; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr]
45-
; CHECK-O3-NEXT: adrp x9, global_int
44+
; CHECK-O3-NEXT: ldr x8, [x8, :lo12:.L_MergedGlobals+8]
45+
; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals
4646
; CHECK-O3-NEXT: add x2, x8, #16
4747
; CHECK-O3-NEXT: mov w0, #10 // =0xa
48-
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
48+
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals]
4949
; CHECK-O3-NEXT: b externalfunc
5050
; CHECK-O3-NEXT: .LBB0_2:
5151
; CHECK-O3-NEXT: mov x8, xzr
52-
; CHECK-O3-NEXT: adrp x9, global_int
52+
; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals
5353
; CHECK-O3-NEXT: add x2, x8, #16
5454
; CHECK-O3-NEXT: mov w0, #10 // =0xa
55-
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
55+
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals]
5656
; CHECK-O3-NEXT: b externalfunc
5757
;
5858
; CHECK-O2-6-LABEL: testcase:

llvm/test/CodeGen/AArch64/global-merge-external.ll

Lines changed: 17 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,27 @@ target triple = "aarch64"
99
@global1 = dso_local local_unnamed_addr global i32 0, align 4
1010

1111
define dso_local i32 @func() {
12-
; CHECK-LABEL: func:
13-
; CHECK: // %bb.0: // %entry
14-
; CHECK-NEXT: adrp x8, global0
15-
; CHECK-NEXT: adrp x9, global1
16-
; CHECK-NEXT: ldr w8, [x8, :lo12:global0]
17-
; CHECK-NEXT: ldr w9, [x9, :lo12:global1]
18-
; CHECK-NEXT: add w0, w9, w8
19-
; CHECK-NEXT: ret
12+
; CHECK-O2-LABEL: func:
13+
; CHECK-O2: // %bb.0: // %entry
14+
; CHECK-O2-NEXT: adrp x8, global0
15+
; CHECK-O2-NEXT: adrp x9, global1
16+
; CHECK-O2-NEXT: ldr w8, [x8, :lo12:global0]
17+
; CHECK-O2-NEXT: ldr w9, [x9, :lo12:global1]
18+
; CHECK-O2-NEXT: add w0, w9, w8
19+
; CHECK-O2-NEXT: ret
20+
;
21+
; CHECK-O3-LABEL: func:
22+
; CHECK-O3: // %bb.0: // %entry
23+
; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals
24+
; CHECK-O3-NEXT: add x8, x8, :lo12:.L_MergedGlobals
25+
; CHECK-O3-NEXT: ldp w9, w8, [x8]
26+
; CHECK-O3-NEXT: add w0, w8, w9
27+
; CHECK-O3-NEXT: ret
2028
entry:
2129
%0 = load i32, ptr @global0, align 4
2230
%1 = load i32, ptr @global1, align 4
2331
%add = add nsw i32 %1, %0
2432
ret i32 %add
2533
}
2634
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
27-
; CHECK-O2: {{.*}}
28-
; CHECK-O3: {{.*}}
35+
; CHECK: {{.*}}

llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
2626
; CHECK-ASM-NEXT: cbz x9, .LBB0_5
2727
; CHECK-ASM-NEXT: // %bb.2:
2828
; CHECK-ASM-NEXT: ldrsw x9, [sp, #8]
29-
; CHECK-ASM-NEXT: adrp x10, B
30-
; CHECK-ASM-NEXT: add x10, x10, :lo12:B
29+
; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals
30+
; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals
3131
; CHECK-ASM-NEXT: strb wzr, [x10, x8]
3232
; CHECK-ASM-NEXT: cmp x9, #10
3333
; CHECK-ASM-NEXT: b.hi .LBB0_6
@@ -36,9 +36,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
3636
; CHECK-ASM-NEXT: sub x8, x8, x9
3737
; CHECK-ASM-NEXT: cbz x8, .LBB0_6
3838
; CHECK-ASM-NEXT: // %bb.4:
39-
; CHECK-ASM-NEXT: adrp x8, B2
40-
; CHECK-ASM-NEXT: add x8, x8, :lo12:B2
41-
; CHECK-ASM-NEXT: strb wzr, [x8, x9]
39+
; CHECK-ASM-NEXT: add x8, x10, x9
40+
; CHECK-ASM-NEXT: strb wzr, [x8, #10]
4241
; CHECK-ASM-NEXT: add sp, sp, #16
4342
; CHECK-ASM-NEXT: .cfi_def_cfa_offset 0
4443
; CHECK-ASM-NEXT: ret

0 commit comments

Comments
 (0)