Skip to content

Commit a8ad1f9

Browse files
committed
[CodeGen] Prevent register coalescer rematerialization based on target
This change makes the register coalescer prevent rematerialization of a trivial def for a move instruction, if the target guides against it, based on the new target hook `shouldReMaterializeTrivialRegDef`. The filter is appended to the exiting logic. The patch includes isolated MIR tests for all register classes supported, and fixes existing tests.
1 parent 1d33c7f commit a8ad1f9

File tree

6 files changed

+320
-49
lines changed

6 files changed

+320
-49
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ STATISTIC(numCrossRCs, "Number of cross class joins performed");
6969
STATISTIC(numCommutes, "Number of instruction commuting performed");
7070
STATISTIC(numExtends, "Number of copies extended");
7171
STATISTIC(NumReMats, "Number of instructions re-materialized");
72+
STATISTIC(NumReMatsPrevented,
73+
"Number of instruction rematerialization prevented by "
74+
"`shouldReMaterializeTrivialRegDef` hook");
7275
STATISTIC(NumInflated, "Number of register classes inflated");
7376
STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
7477
STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved");
@@ -1400,6 +1403,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
14001403
if (!Edit.canRematerializeAt(RM, ValNo, CopyIdx))
14011404
return false;
14021405

1406+
if (!TII->shouldReMaterializeTrivialRegDef(CopyMI, DstReg, SrcReg)) {
1407+
LLVM_DEBUG(dbgs() << "Remat prevented: " << CopyIdx << "\t" << *CopyMI);
1408+
++NumReMatsPrevented;
1409+
return false;
1410+
}
1411+
14031412
DebugLoc DL = CopyMI->getDebugLoc();
14041413
MachineBasicBlock *MBB = CopyMI->getParent();
14051414
MachineBasicBlock::iterator MII =

llvm/test/CodeGen/AArch64/arm64-abi-varargs.ll

Lines changed: 12 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -64,18 +64,18 @@ define i32 @main() nounwind ssp {
6464
; CHECK: ; %bb.0:
6565
; CHECK-NEXT: sub sp, sp, #96
6666
; CHECK-NEXT: stp x29, x30, [sp, #80] ; 16-byte Folded Spill
67-
; CHECK-NEXT: mov w9, #1 ; =0x1
68-
; CHECK-NEXT: mov w8, #2 ; =0x2
69-
; CHECK-NEXT: stp w8, w9, [sp, #72]
70-
; CHECK-NEXT: mov w9, #3 ; =0x3
71-
; CHECK-NEXT: mov w8, #4 ; =0x4
72-
; CHECK-NEXT: stp w8, w9, [sp, #64]
73-
; CHECK-NEXT: mov w9, #5 ; =0x5
74-
; CHECK-NEXT: mov w8, #6 ; =0x6
75-
; CHECK-NEXT: stp w8, w9, [sp, #56]
76-
; CHECK-NEXT: mov w9, #7 ; =0x7
77-
; CHECK-NEXT: mov w8, #8 ; =0x8
78-
; CHECK-NEXT: stp w8, w9, [sp, #48]
67+
; CHECK-NEXT: mov w8, #1 ; =0x1
68+
; CHECK-NEXT: mov w1, #2 ; =0x2
69+
; CHECK-NEXT: stp w1, w8, [sp, #72]
70+
; CHECK-NEXT: mov w2, #3 ; =0x3
71+
; CHECK-NEXT: mov w3, #4 ; =0x4
72+
; CHECK-NEXT: stp w3, w2, [sp, #64]
73+
; CHECK-NEXT: mov w4, #5 ; =0x5
74+
; CHECK-NEXT: mov w5, #6 ; =0x6
75+
; CHECK-NEXT: stp w5, w4, [sp, #56]
76+
; CHECK-NEXT: mov w6, #7 ; =0x7
77+
; CHECK-NEXT: mov w7, #8 ; =0x8
78+
; CHECK-NEXT: stp w7, w6, [sp, #48]
7979
; CHECK-NEXT: mov w8, #9 ; =0x9
8080
; CHECK-NEXT: mov w9, #10 ; =0xa
8181
; CHECK-NEXT: stp w9, w8, [sp, #40]
@@ -86,13 +86,6 @@ define i32 @main() nounwind ssp {
8686
; CHECK-NEXT: str x9, [sp, #8]
8787
; CHECK-NEXT: str w8, [sp]
8888
; CHECK-NEXT: add x0, sp, #76
89-
; CHECK-NEXT: mov w1, #2 ; =0x2
90-
; CHECK-NEXT: mov w2, #3 ; =0x3
91-
; CHECK-NEXT: mov w3, #4 ; =0x4
92-
; CHECK-NEXT: mov w4, #5 ; =0x5
93-
; CHECK-NEXT: mov w5, #6 ; =0x6
94-
; CHECK-NEXT: mov w6, #7 ; =0x7
95-
; CHECK-NEXT: mov w7, #8 ; =0x8
9689
; CHECK-NEXT: bl _fn9
9790
; CHECK-NEXT: mov w0, #0 ; =0x0
9891
; CHECK-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload
Lines changed: 174 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
2+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
3+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
4+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-fpr64 -mattr=-zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
5+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-fpr64 -mattr=+zcm-fpr128 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
6+
7+
--- |
8+
define void @remat_FPR128() {
9+
ret void
10+
}
11+
declare void @foo_v4i32(<4 x float>, <4 x float>)
12+
13+
define void @remat_FPR64() {
14+
ret void
15+
}
16+
declare void @foo_double(double, double)
17+
18+
define void @remat_FPR32() {
19+
ret void
20+
}
21+
declare void @foo_float(float, float)
22+
23+
define void @remat_FPR16() {
24+
ret void
25+
}
26+
declare void @foo_half(half, half)
27+
...
28+
---
29+
name: remat_FPR128
30+
tracksRegLiveness: true
31+
body: |
32+
bb.0:
33+
; CHECK-LABEL: name: remat_FPR128
34+
35+
; NOTCPU-LINUX: %0:fpr128 = MOVIv2d_ns 64
36+
; NOTCPU-LINUX-NEXT: %1:fpr128 = MOVIv2d_ns 64
37+
; NOTCPU-LINUX: BL @foo_v4i32
38+
39+
; NOTCPU-APPLE: %0:fpr128 = MOVIv2d_ns 64
40+
; NOTCPU-APPLE-NEXT: %1:fpr128 = MOVIv2d_ns 64
41+
; NOTCPU-APPLE: BL @foo_v4i32
42+
43+
; CPU: %0:fpr128 = MOVIv2d_ns 64
44+
; CPU-NEXT: %1:fpr128 = COPY %0
45+
; CPU: BL @foo_v4i32
46+
47+
; NOTATTR: %0:fpr128 = MOVIv2d_ns 64
48+
; NOTATTR-NEXT: %1:fpr128 = MOVIv2d_ns 64
49+
; NOTATTR: BL @foo_v4i32
50+
51+
; ATTR: %0:fpr128 = MOVIv2d_ns 64
52+
; ATTR-NEXT: %1:fpr128 = COPY %0
53+
; ATTR: BL @foo_v4i32
54+
55+
%0:fpr128 = MOVIv2d_ns 64
56+
%1:fpr128 = COPY %0
57+
58+
; Creates a live range interference to prevent coalescing and force
59+
; trying to rematerialize the previous COPY.
60+
%1 = ADDv4i32 %1, %1
61+
62+
BL @foo_v4i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
63+
RET_ReallyLR
64+
65+
---
66+
name: remat_FPR64
67+
tracksRegLiveness: true
68+
body: |
69+
bb.0:
70+
; CHECK-LABEL: name: remat_FPR64
71+
72+
; NOTCPU-LINUX: %0:fpr64 = FMOVDi 64
73+
; NOTCPU-LINUX-NEXT: %1:fpr64 = FMOVDi 64
74+
; NOTCPU-LINUX: BL @foo_double
75+
76+
; NOTCPU-APPLE: %0:fpr64 = FMOVDi 64
77+
; NOTCPU-APPLE-NEXT: %1:fpr64 = FMOVDi 64
78+
; NOTCPU-APPLE: BL @foo_double
79+
80+
; CPU: %0:fpr64 = FMOVDi 64
81+
; CPU-NEXT: %1:fpr64 = COPY %0
82+
; CPU: BL @foo_double
83+
84+
; NOTATTR: %0:fpr64 = FMOVDi 64
85+
; NOTATTR-NEXT: %1:fpr64 = FMOVDi 64
86+
; NOTATTR: BL @foo_double
87+
88+
; ATTR: %0:fpr64 = FMOVDi 64
89+
; ATTR-NEXT: %1:fpr64 = COPY %0
90+
; ATTR: BL @foo_double
91+
92+
%0:fpr64 = FMOVDi 64
93+
%1:fpr64 = COPY %0
94+
95+
; Creates a live range interference to prevent coalescing and force
96+
; trying to rematerialize the previous COPY.
97+
%1 = FADDDrr %1, %1, implicit $fpcr
98+
99+
BL @foo_double, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
100+
RET_ReallyLR
101+
102+
---
103+
name: remat_FPR32
104+
tracksRegLiveness: true
105+
body: |
106+
bb.0:
107+
; CHECK-LABEL: name: remat_FPR32
108+
109+
; NOTCPU-LINUX: %0:fpr32 = FMOVSi 64
110+
; NOTCPU-LINUX-NEXT: %1:fpr32 = FMOVSi 64
111+
; NOTCPU-LINUX: BL @foo_float
112+
113+
; NOTCPU-APPLE: %0:fpr32 = FMOVSi 64
114+
; NOTCPU-APPLE-NEXT: %1:fpr32 = FMOVSi 64
115+
; NOTCPU-APPLE: BL @foo_float
116+
117+
; CPU: %0:fpr32 = FMOVSi 64
118+
; CPU-NEXT: %1:fpr32 = COPY %0
119+
; CPU: BL @foo_float
120+
121+
; NOTATTR: %0:fpr32 = FMOVSi 64
122+
; NOTATTR-NEXT: %1:fpr32 = FMOVSi 64
123+
; NOTATTR: BL @foo_float
124+
125+
; ATTR: %0:fpr32 = FMOVSi 64
126+
; ATTR-NEXT: %1:fpr32 = COPY %0
127+
; ATTR: BL @foo_float
128+
129+
%0:fpr32 = FMOVSi 64
130+
%1:fpr32 = COPY %0
131+
132+
; Creates a live range interference to prevent coalescing and force
133+
; trying to rematerialize the previous COPY.
134+
%1 = FADDSrr %1, %1, implicit $fpcr
135+
136+
BL @foo_float, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
137+
RET_ReallyLR
138+
139+
---
140+
name: remat_FPR16
141+
tracksRegLiveness: true
142+
body: |
143+
bb.0:
144+
; CHECK-LABEL: name: remat_FPR16
145+
146+
; NOTCPU-LINUX: %0:fpr16 = FMOVHi 64
147+
; NOTCPU-LINUX-NEXT: %1:fpr16 = FMOVHi 64
148+
; NOTCPU-LINUX: BL @foo_half
149+
150+
; NOTCPU-APPLE: %0:fpr16 = FMOVHi 64
151+
; NOTCPU-APPLE-NEXT: %1:fpr16 = FMOVHi 64
152+
; NOTCPU-APPLE: BL @foo_half
153+
154+
; CPU: %0:fpr16 = FMOVHi 64
155+
; CPU-NEXT: %1:fpr16 = COPY %0
156+
; CPU: BL @foo_half
157+
158+
; NOTATTR: %0:fpr16 = FMOVHi 64
159+
; NOTATTR-NEXT: %1:fpr16 = FMOVHi 64
160+
; NOTATTR: BL @foo_half
161+
162+
; ATTR: %0:fpr16 = FMOVHi 64
163+
; ATTR-NEXT: %1:fpr16 = COPY %0
164+
; ATTR: BL @foo_half
165+
166+
%0:fpr16 = FMOVHi 64
167+
%1:fpr16 = COPY %0
168+
169+
; Creates a live range interference to prevent coalescing and force
170+
; trying to rematerialize the previous COPY.
171+
%1 = FADDHrr %1, %1, implicit $fpcr
172+
173+
BL @foo_half, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
174+
RET_ReallyLR
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# RUN: llc -o - -mtriple=arm64-linux-gnu -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-LINUX
2+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=generic -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTCPU-APPLE
3+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=CPU
4+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mcpu=apple-m1 -mattr=-zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=NOTATTR
5+
# RUN: llc -o - -mtriple=arm64-apple-macosx -mattr=+zcm-gpr64 -run-pass=register-coalescer -verify-coalescing %s | FileCheck %s -check-prefixes=ATTR
6+
7+
--- |
8+
define void @remat_GPR32() {
9+
ret void
10+
}
11+
declare void @foo_i32(i32, i32)
12+
13+
define void @remat_GPR64() {
14+
ret void
15+
}
16+
declare void @foo_i64(i64, i64)
17+
...
18+
---
19+
name: remat_GPR32
20+
tracksRegLiveness: true
21+
body: |
22+
bb.0:
23+
; CHECK-LABEL: name: remat_GPR32
24+
25+
; NOTCPU-LINUX: %0:gpr32 = MOVi32imm 32
26+
; NOTCPU-LINUX-NEXT: %1:gpr32common = MOVi32imm 32
27+
; NOTCPU-LINUX: BL @foo_i32
28+
29+
; NOTCPU-APPLE: %0:gpr32 = MOVi32imm 32
30+
; NOTCPU-APPLE-NEXT: %1:gpr32common = MOVi32imm 32
31+
; NOTCPU-APPLE: BL @foo_i32
32+
33+
; CPU: %0:gpr32 = MOVi32imm 32
34+
; CPU-NEXT: %1:gpr32sp = COPY %0
35+
; CPU: BL @foo_i32
36+
37+
; NOTATTR: %0:gpr32 = MOVi32imm 32
38+
; NOTATTR-NEXT: %1:gpr32common = MOVi32imm 32
39+
; NOTATTR: BL @foo_i32
40+
41+
; ATTR: %0:gpr32 = MOVi32imm 32
42+
; ATTR-NEXT: %1:gpr32sp = COPY %0
43+
; ATTR: BL @foo_i32
44+
45+
%0:gpr32 = MOVi32imm 32
46+
%1:gpr32sp = COPY %0
47+
48+
; Creates a live range interference to prevent coalescing and force
49+
; trying to rematerialize the previous COPY.
50+
%1 = ADDWri %1, 1, 0
51+
52+
BL @foo_i32, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
53+
RET_ReallyLR
54+
55+
---
56+
name: remat_GPR64
57+
tracksRegLiveness: true
58+
body: |
59+
bb.0:
60+
; CHECK-LABEL: name: remat_GPR64
61+
62+
; NOTCPU-LINUX: %0:gpr64 = MOVi64imm 64
63+
; NOTCPU-LINUX-NEXT: %1:gpr64common = MOVi64imm 64
64+
; NOTCPU-LINUX: BL @foo_i64
65+
66+
; NOTCPU-APPLE: %0:gpr64 = MOVi64imm 64
67+
; NOTCPU-APPLE-NEXT: %1:gpr64common = MOVi64imm 64
68+
; NOTCPU-APPLE: BL @foo_i64
69+
70+
; CPU: %0:gpr64 = MOVi64imm 64
71+
; CPU-NEXT: %1:gpr64sp = COPY %0
72+
; CPU: BL @foo_i64
73+
74+
; NOTATTR: %0:gpr64 = MOVi64imm 64
75+
; NOTATTR-NEXT: %1:gpr64common = MOVi64imm 64
76+
; NOTATTR: BL @foo_i64
77+
78+
; ATTR: %0:gpr64 = MOVi64imm 64
79+
; ATTR-NEXT: %1:gpr64sp = COPY %0
80+
; ATTR: BL @foo_i64
81+
82+
%0:gpr64 = MOVi64imm 64
83+
%1:gpr64sp = COPY %0
84+
85+
; Creates a live range interference to prevent coalescing and force
86+
; trying to rematerialize the previous COPY.
87+
%1 = ADDXri %1, 1, 0
88+
89+
BL @foo_i64, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit %0, implicit %1
90+
RET_ReallyLR

llvm/test/CodeGen/AArch64/arm64-vshuffle.ll

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ define <8 x i1> @test1() {
44
; CHECK-LABEL: test1:
55
; CHECK: ; %bb.0: ; %entry
66
; CHECK-NEXT: movi.16b v0, #0
7+
; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
78
; CHECK-NEXT: ret
89
entry:
910
%Shuff = shufflevector <8 x i1> <i1 0, i1 1, i1 2, i1 3, i1 4, i1 5, i1 6,
@@ -58,9 +59,14 @@ bb:
5859
; CHECK: .byte 0 ; 0x0
5960
; CHECK: .byte 0 ; 0x0
6061
define <16 x i1> @test4(ptr %ptr, i32 %v) {
61-
; CHECK-LABEL: _test4:
62-
; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE
63-
; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF]
62+
; CHECK-LABEL: test4:
63+
; CHECK: ; %bb.0: ; %bb
64+
; CHECK-NEXT: Lloh0:
65+
; CHECK-NEXT: adrp x8, lCPI3_0@PAGE
66+
; CHECK-NEXT: Lloh1:
67+
; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF]
68+
; CHECK-NEXT: ret
69+
; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
6470
bb:
6571
%Shuff = shufflevector <16 x i1> zeroinitializer,
6672
<16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1,

0 commit comments

Comments
 (0)