Skip to content

Commit b18fcde

Browse files
hstk30-hwaadeshps-mcw
authored andcommitted
[GlobalMerge]Prefer use global-merge-max-offset instead of the target-specific constant offset. (llvm#165591)
In the Dhrystone benchmark, I find some adjacent global not be merged, on the contrary the GCC's anchor optimize is work. Use global-merge-max-offset to set the max offset can yield similar results (still slightly different, at least we can control the offset).
1 parent 644aa40 commit b18fcde

File tree

3 files changed

+17
-8
lines changed

3 files changed

+17
-8
lines changed

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -772,6 +772,9 @@ Pass *llvm::createGlobalMergePass(const TargetMachine *TM, unsigned Offset,
772772
bool MergeConstAggressive = GlobalMergeAllConst.getNumOccurrences() > 0
773773
? GlobalMergeAllConst
774774
: MergeConstAggressiveByDefault;
775-
return new GlobalMerge(TM, Offset, OnlyOptimizeForSize, MergeExternal,
775+
unsigned PreferOffset = GlobalMergeMaxOffset.getNumOccurrences() > 0
776+
? GlobalMergeMaxOffset
777+
: Offset;
778+
return new GlobalMerge(TM, PreferOffset, OnlyOptimizeForSize, MergeExternal,
776779
MergeConstant, MergeConstAggressive);
777780
}
Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 | FileCheck --check-prefix=NO-MERGE %s
2+
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O1 | FileCheck --check-prefix=NO-MERGE %s
3+
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O2 | FileCheck --check-prefix=NO-MERGE %s
4+
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O3 | FileCheck %s
5+
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O3 -global-merge-max-offset=0 | FileCheck %s --check-prefix=NO-MERGE
26
; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -O0 -global-merge-on-external=true | FileCheck --check-prefix=NO-MERGE %s
37

48
; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
9+
; RUN: llc < %s -mtriple=aarch64-apple-ios -O1 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
10+
; RUN: llc < %s -mtriple=aarch64-apple-ios -O2 | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
11+
; RUN: llc < %s -mtriple=aarch64-apple-ios -O3 | FileCheck %s --check-prefix=CHECK-APPLE-IOS
512
; RUN: llc < %s -mtriple=aarch64-apple-ios -O0 -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-APPLE-IOS-NO-MERGE
613

7-
; FIXME: add O1/O2 test for aarch64-none-linux-gnu and aarch64-apple-ios
8-
914
@m = internal global i32 0, align 4
1015
@n = internal global i32 0, align 4
1116

1217
define void @f1(i32 %a1, i32 %a2) {
1318
; CHECK-LABEL: f1:
14-
; CHECK: adrp x{{[0-9]+}}, _MergedGlobals
19+
; CHECK: adrp x{{[0-9]+}}, .L_MergedGlobals
1520
; CHECK-NOT: adrp
1621

1722
; CHECK-APPLE-IOS-LABEL: f1:
@@ -22,9 +27,9 @@ define void @f1(i32 %a1, i32 %a2) {
2227
ret void
2328
}
2429

25-
; CHECK: .local _MergedGlobals
26-
; CHECK: .comm _MergedGlobals,8,8
30+
; CHECK: .local .L_MergedGlobals
31+
; CHECK: .comm .L_MergedGlobals,8,4
2732
; NO-MERGE-NOT: .local _MergedGlobals
2833

29-
; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,3
30-
; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,3
34+
; CHECK-APPLE-IOS: .zerofill __DATA,__bss,__MergedGlobals,8,2
35+
; CHECK-APPLE-IOS-NO-MERGE-NOT: .zerofill __DATA,__bss,__MergedGlobals,8,2

llvm/test/CodeGen/ARM/global-merge-1.ll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
; RUN: llc %s -O3 -o - | FileCheck -check-prefix=MERGE %s
88
; RUN: llc %s -O3 -o - -arm-global-merge=false | FileCheck -check-prefix=NO-MERGE %s
99
; RUN: llc %s -O3 -o - -arm-global-merge=true | FileCheck -check-prefix=MERGE %s
10+
; RUN: llc %s -O3 -o - -arm-global-merge=true -global-merge-max-offset=0 | FileCheck -check-prefix=NO-MERGE %s
1011

1112
; MERGE-NOT: .zerofill __DATA,__bss,_bar,20,2
1213
; MERGE-NOT: .zerofill __DATA,__bss,_baz,20,2

0 commit comments

Comments
 (0)