Skip to content

Commit c8e1e32

Browse files
committed
Add target hook and only enable by default for AMDGPU
1 parent d67103e commit c8e1e32

File tree

96 files changed

+5618
-5524
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+5618
-5524
lines changed

llvm/include/llvm/CodeGen/TargetSubtargetInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,10 @@ class LLVM_ABI TargetSubtargetInfo : public MCSubtargetInfo {
210210
/// can be overridden.
211211
virtual bool enableJoinGlobalCopies() const;
212212

213+
/// Hack to bring up option. This should be unconditionally true, all targets
214+
/// should enable it and delete this.
215+
virtual bool enableTerminalRule() const { return false; }
216+
213217
/// True if the subtarget should run a scheduler after register allocation.
214218
///
215219
/// By default this queries the PostRAScheduling bit in the scheduling model

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals",
7979
cl::desc("Coalesce copies (default=true)"),
8080
cl::init(true), cl::Hidden);
8181

82-
static cl::opt<bool> UseTerminalRule("terminal-rule",
83-
cl::desc("Apply the terminal rule"),
84-
cl::init(true), cl::Hidden);
82+
static cl::opt<cl::boolOrDefault>
83+
EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"),
84+
cl::init(cl::BOU_UNSET), cl::Hidden);
8585

8686
/// Temporary flag to test critical edge unsplitting.
8787
static cl::opt<bool> EnableJoinSplits(
@@ -134,6 +134,7 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
134134
SlotIndexes *SI = nullptr;
135135
const MachineLoopInfo *Loops = nullptr;
136136
RegisterClassInfo RegClassInfo;
137+
bool UseTerminalRule = false;
137138

138139
/// Position and VReg of a PHI instruction during coalescing.
139140
struct PHIValPos {
@@ -4312,6 +4313,11 @@ bool RegisterCoalescer::run(MachineFunction &fn) {
43124313
else
43134314
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
43144315

4316+
if (EnableTerminalRule == cl::BOU_UNSET)
4317+
UseTerminalRule = STI.enableTerminalRule();
4318+
else
4319+
UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE;
4320+
43154321
// If there are PHIs tracked by debug-info, they will need updating during
43164322
// coalescing. Build an index of those PHIs to ease updating.
43174323
SlotIndexes *Slots = LIS->getSlotIndexes();

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,6 +1040,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
10401040
return true;
10411041
}
10421042

1043+
bool enableTerminalRule() const override { return true; }
1044+
10431045
bool useAA() const override;
10441046

10451047
bool enableSubRegLiveness() const override {

llvm/lib/Target/AMDGPU/R600Subtarget.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,8 @@ class R600Subtarget final : public R600GenSubtargetInfo,
126126
return true;
127127
}
128128

129+
bool enableTerminalRule() const override { return true; }
130+
129131
bool enableSubRegLiveness() const override {
130132
return true;
131133
}

llvm/test/CodeGen/AArch64/build-vector-two-dup.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -188,11 +188,11 @@ entry:
188188
define <8 x i8> @test11(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
189189
; CHECK-LABEL: test11:
190190
; CHECK: // %bb.0: // %entry
191-
; CHECK-NEXT: ld1r { v0.8b }, [x0]
192-
; CHECK-NEXT: ld1r { v1.8b }, [x1]
193-
; CHECK-NEXT: fmov d2, d0
194-
; CHECK-NEXT: mov v0.h[2], v1.h[0]
195-
; CHECK-NEXT: mov v0.h[3], v2.h[0]
191+
; CHECK-NEXT: ld1r { v1.8b }, [x0]
192+
; CHECK-NEXT: ld1r { v2.8b }, [x1]
193+
; CHECK-NEXT: mov v0.16b, v1.16b
194+
; CHECK-NEXT: mov v0.h[2], v2.h[0]
195+
; CHECK-NEXT: mov v0.h[3], v1.h[0]
196196
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
197197
; CHECK-NEXT: ret
198198
entry:

llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,20 @@ define i32 @sink_load_and_copy(i32 %n) {
2020
; CHECK-NEXT: b.lt .LBB0_3
2121
; CHECK-NEXT: // %bb.1: // %for.body.preheader
2222
; CHECK-NEXT: adrp x8, A
23-
; CHECK-NEXT: mov w21, w19
24-
; CHECK-NEXT: ldr w20, [x8, :lo12:A]
23+
; CHECK-NEXT: mov w20, w19
24+
; CHECK-NEXT: ldr w21, [x8, :lo12:A]
2525
; CHECK-NEXT: .LBB0_2: // %for.body
2626
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
27-
; CHECK-NEXT: mov w0, w20
27+
; CHECK-NEXT: mov w0, w21
2828
; CHECK-NEXT: bl _Z3usei
29-
; CHECK-NEXT: sdiv w19, w19, w0
30-
; CHECK-NEXT: subs w21, w21, #1
29+
; CHECK-NEXT: sdiv w20, w20, w0
30+
; CHECK-NEXT: subs w19, w19, #1
3131
; CHECK-NEXT: b.ne .LBB0_2
32-
; CHECK-NEXT: .LBB0_3: // %for.cond.cleanup
33-
; CHECK-NEXT: mov w0, w19
32+
; CHECK-NEXT: b .LBB0_4
33+
; CHECK-NEXT: .LBB0_3:
34+
; CHECK-NEXT: mov w20, w19
35+
; CHECK-NEXT: .LBB0_4: // %for.cond.cleanup
36+
; CHECK-NEXT: mov w0, w20
3437
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
3538
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
3639
; CHECK-NEXT: ret
@@ -79,12 +82,15 @@ define i32 @cant_sink_successive_call(i32 %n) {
7982
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
8083
; CHECK-NEXT: mov w0, w20
8184
; CHECK-NEXT: bl _Z3usei
82-
; CHECK-NEXT: sdiv w19, w19, w0
83-
; CHECK-NEXT: subs w21, w21, #1
85+
; CHECK-NEXT: sdiv w21, w21, w0
86+
; CHECK-NEXT: subs w19, w19, #1
8487
; CHECK-NEXT: b.ne .LBB1_2
85-
; CHECK-NEXT: .LBB1_3: // %for.cond.cleanup
86-
; CHECK-NEXT: mov w0, w19
88+
; CHECK-NEXT: b .LBB1_4
89+
; CHECK-NEXT: .LBB1_3:
90+
; CHECK-NEXT: mov w21, w19
91+
; CHECK-NEXT: .LBB1_4: // %for.cond.cleanup
8792
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
93+
; CHECK-NEXT: mov w0, w21
8894
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
8995
; CHECK-NEXT: ret
9096
entry:
@@ -133,12 +139,15 @@ define i32 @cant_sink_successive_store(ptr nocapture readnone %store, i32 %n) {
133139
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
134140
; CHECK-NEXT: mov w0, w20
135141
; CHECK-NEXT: bl _Z3usei
136-
; CHECK-NEXT: sdiv w19, w19, w0
137-
; CHECK-NEXT: subs w21, w21, #1
142+
; CHECK-NEXT: sdiv w21, w21, w0
143+
; CHECK-NEXT: subs w19, w19, #1
138144
; CHECK-NEXT: b.ne .LBB2_2
139-
; CHECK-NEXT: .LBB2_3: // %for.cond.cleanup
140-
; CHECK-NEXT: mov w0, w19
145+
; CHECK-NEXT: b .LBB2_4
146+
; CHECK-NEXT: .LBB2_3:
147+
; CHECK-NEXT: mov w21, w19
148+
; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup
141149
; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload
150+
; CHECK-NEXT: mov w0, w21
142151
; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload
143152
; CHECK-NEXT: ret
144153
entry:

llvm/test/CodeGen/AArch64/machine-sink-kill-flags.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,13 @@ define i32 @test(ptr %ptr) {
1616
; CHECK-NEXT: mov w9, wzr
1717
; CHECK-NEXT: LBB0_1: ; %.thread
1818
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
19+
; CHECK-NEXT: lsr w11, w9, #1
1920
; CHECK-NEXT: sub w10, w9, #1
20-
; CHECK-NEXT: lsr w9, w9, #1
21+
; CHECK-NEXT: mov w9, w11
2122
; CHECK-NEXT: tbnz w10, #0, LBB0_1
2223
; CHECK-NEXT: ; %bb.2: ; %bb343
2324
; CHECK-NEXT: and w9, w10, #0x1
24-
; CHECK-NEXT: mov w0, #-1 ; =0xffffffff
25+
; CHECK-NEXT: mov w0, #-1
2526
; CHECK-NEXT: str w9, [x8]
2627
; CHECK-NEXT: ret
2728
bb:

0 commit comments

Comments
 (0)