Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,12 +694,6 @@ bool AArch64PassConfig::addPreISel() {
// is disabled as we emit the .subsections_via_symbols directive which
// means that merging extern globals is not safe.
bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO();

// FIXME: extern global merging is only enabled when we optimise for size
// because there are some regressions with it also enabled for performance.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was going to ask about these "some regressions", but did a git blame first and noticed I contributed this, so yes, that's funny and I can't remember what they were. Does this ring a bell to you, @davemgreen ? If I had to take a guess, then I did some measurements on smaller A-cores, i.e. not the wide X or Neoverse cores we have today. You did mention that it didn't affect perf very much. I am fine with the change, but my only question out of curiousity is which cores you've tested.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://reviews.llvm.org/D61947 mentions one small regression in spec2000, but it was apparently ran with -Oz.

I haven't seen any worrying performance results myself, either on server class cpus or models.

if (!OnlyOptimizeForSize)
MergeExternalByDefault = false;

addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize,
MergeExternalByDefault));
}
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AArch64/aarch64-tail-dup-size.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,24 +35,24 @@ define dso_local void @testcase(ptr nocapture %arg){
;
; CHECK-O3-LABEL: testcase:
; CHECK-O3: // %bb.0: // %entry
; CHECK-O3-NEXT: adrp x8, global_ptr
; CHECK-O3-NEXT: ldr x9, [x8, :lo12:global_ptr]
; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals+8
; CHECK-O3-NEXT: ldr x9, [x8, :lo12:.L_MergedGlobals+8]
; CHECK-O3-NEXT: cbz x9, .LBB0_2
; CHECK-O3-NEXT: // %bb.1: // %if.then
; CHECK-O3-NEXT: ldr x9, [x9]
; CHECK-O3-NEXT: str x9, [x0]
; CHECK-O3-NEXT: ldr x8, [x8, :lo12:global_ptr]
; CHECK-O3-NEXT: adrp x9, global_int
; CHECK-O3-NEXT: ldr x8, [x8, :lo12:.L_MergedGlobals+8]
; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals
; CHECK-O3-NEXT: add x2, x8, #16
; CHECK-O3-NEXT: mov w0, #10 // =0xa
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals]
; CHECK-O3-NEXT: b externalfunc
; CHECK-O3-NEXT: .LBB0_2:
; CHECK-O3-NEXT: mov x8, xzr
; CHECK-O3-NEXT: adrp x9, global_int
; CHECK-O3-NEXT: adrp x9, .L_MergedGlobals
; CHECK-O3-NEXT: add x2, x8, #16
; CHECK-O3-NEXT: mov w0, #10 // =0xa
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:global_int]
; CHECK-O3-NEXT: ldr w1, [x9, :lo12:.L_MergedGlobals]
; CHECK-O3-NEXT: b externalfunc
;
; CHECK-O2-6-LABEL: testcase:
Expand Down
27 changes: 17 additions & 10 deletions llvm/test/CodeGen/AArch64/global-merge-external.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,27 @@ target triple = "aarch64"
@global1 = dso_local local_unnamed_addr global i32 0, align 4

define dso_local i32 @func() {
; CHECK-LABEL: func:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, global0
; CHECK-NEXT: adrp x9, global1
; CHECK-NEXT: ldr w8, [x8, :lo12:global0]
; CHECK-NEXT: ldr w9, [x9, :lo12:global1]
; CHECK-NEXT: add w0, w9, w8
; CHECK-NEXT: ret
; CHECK-O2-LABEL: func:
; CHECK-O2: // %bb.0: // %entry
; CHECK-O2-NEXT: adrp x8, global0
; CHECK-O2-NEXT: adrp x9, global1
; CHECK-O2-NEXT: ldr w8, [x8, :lo12:global0]
; CHECK-O2-NEXT: ldr w9, [x9, :lo12:global1]
; CHECK-O2-NEXT: add w0, w9, w8
; CHECK-O2-NEXT: ret
;
; CHECK-O3-LABEL: func:
; CHECK-O3: // %bb.0: // %entry
; CHECK-O3-NEXT: adrp x8, .L_MergedGlobals
; CHECK-O3-NEXT: add x8, x8, :lo12:.L_MergedGlobals
; CHECK-O3-NEXT: ldp w9, w8, [x8]
; CHECK-O3-NEXT: add w0, w8, w9
; CHECK-O3-NEXT: ret
entry:
%0 = load i32, ptr @global0, align 4
%1 = load i32, ptr @global1, align 4
%add = add nsw i32 %1, %0
ret i32 %add
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; CHECK-O2: {{.*}}
; CHECK-O3: {{.*}}
; CHECK: {{.*}}
9 changes: 4 additions & 5 deletions llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
; CHECK-ASM-NEXT: cbz x9, .LBB0_5
; CHECK-ASM-NEXT: // %bb.2:
; CHECK-ASM-NEXT: ldrsw x9, [sp, #8]
; CHECK-ASM-NEXT: adrp x10, B
; CHECK-ASM-NEXT: add x10, x10, :lo12:B
; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals
; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals
; CHECK-ASM-NEXT: strb wzr, [x10, x8]
; CHECK-ASM-NEXT: cmp x9, #10
; CHECK-ASM-NEXT: b.hi .LBB0_6
Expand All @@ -36,9 +36,8 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
; CHECK-ASM-NEXT: sub x8, x8, x9
; CHECK-ASM-NEXT: cbz x8, .LBB0_6
; CHECK-ASM-NEXT: // %bb.4:
; CHECK-ASM-NEXT: adrp x8, B2
; CHECK-ASM-NEXT: add x8, x8, :lo12:B2
; CHECK-ASM-NEXT: strb wzr, [x8, x9]
; CHECK-ASM-NEXT: add x8, x10, x9
; CHECK-ASM-NEXT: strb wzr, [x8, #10]
; CHECK-ASM-NEXT: add sp, sp, #16
; CHECK-ASM-NEXT: .cfi_def_cfa_offset 0
; CHECK-ASM-NEXT: ret
Expand Down
Loading