-
Notifications
You must be signed in to change notification settings - Fork 15.3k
CodeGen: Remove target hook for terminal rule #165962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
CodeGen: Remove target hook for terminal rule #165962
Conversation
|
@llvm/pr-subscribers-backend-powerpc @llvm/pr-subscribers-llvm-regalloc Author: Matt Arsenault (arsenm) ChangesEnables the terminal rule for remaining targets Patch is 44.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165962.diff 22 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6e0bd701a191d..431478ad7a4d4 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true), cl::Hidden);
-static cl::opt<cl::boolOrDefault>
- EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"),
- cl::init(cl::BOU_UNSET), cl::Hidden);
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(true), cl::Hidden);
/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool> EnableJoinSplits(
@@ -134,7 +134,6 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
SlotIndexes *SI = nullptr;
const MachineLoopInfo *Loops = nullptr;
RegisterClassInfo RegClassInfo;
- bool UseTerminalRule = false;
/// Position and VReg of a PHI instruction during coalescing.
struct PHIValPos {
@@ -4313,11 +4312,6 @@ bool RegisterCoalescer::run(MachineFunction &fn) {
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
- if (EnableTerminalRule == cl::BOU_UNSET)
- UseTerminalRule = STI.enableTerminalRule();
- else
- UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE;
-
// If there are PHIs tracked by debug-info, they will need updating during
// coalescing. Build an index of those PHIs to ease updating.
SlotIndexes *Slots = LIS->getSlotIndexes();
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index ab4004e30f629..8974965c41fe3 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -157,7 +157,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
- bool enableTerminalRule() const override { return true; }
+
bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index da4bd878b8853..f377b8aaf1333 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1040,8 +1040,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return true;
}
- bool enableTerminalRule() const override { return true; }
-
bool useAA() const override;
bool enableSubRegLiveness() const override {
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index efd99dbc1a08b..22e56b66e1827 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -126,8 +126,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
return true;
}
- bool enableTerminalRule() const override { return true; }
-
bool enableSubRegLiveness() const override {
return true;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 34baa3108402c..4a0883cc662e7 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -377,7 +377,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
- bool enableTerminalRule() const override { return true; }
bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index b3d5d010d378d..995f66d0551b4 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -295,8 +295,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
- bool enableTerminalRule() const override { return true; }
-
// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
// compiler time and will be removed eventually anyway.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 817fb7cb5c41a..4b4fc8f0d8e76 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -147,7 +147,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool enableMachineScheduler() const override { return true; }
- bool enableTerminalRule() const override { return true; }
bool enablePostRAScheduler() const override { return UsePostRAScheduler; }
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4f5aadca361fe..868f41375b96b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -419,8 +419,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- bool enableTerminalRule() const override { return true; }
-
bool enableEarlyIfConversion() const override;
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
diff --git a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
index 895b68b5a9145..ce40085feb0d0 100644
--- a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
+++ b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
@@ -25,8 +25,7 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
%11 = sub nsw i32 %7, %9
%12 = icmp slt i32 %10, %11
br i1 %12, label %5, label %13
-; CHECK: r1 = r3
-; CHECK: if r2 s> r3 goto -10 <test+0x40>
+; CHECK: if r2 s> r1 goto -10 <test+0x40>
; <label>:13: ; preds = %5, %2
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index b2a3f94d11a16..3057e91e8ebe4 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -756,24 +756,24 @@ define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: and.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: and.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB34_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw and ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -791,24 +791,24 @@ define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: or.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB35_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw or ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -826,24 +826,24 @@ define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB36_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw xor ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -861,29 +861,29 @@ define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.lt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB37_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw min ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -901,29 +901,29 @@ define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.gt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB38_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw max ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -941,29 +941,29 @@ define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.lt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB39_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umin ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -981,29 +981,29 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.gt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB40_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
ret i128 %ret
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index e2762bac45a35..313be95c03192 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -63,32 +63,32 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: ld.b32 %r46, [%r1];
; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start45
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT: shr.u32 %r20, %r46, %r2;
+; CHECKPTX62-NEXT: mov.b32 %r4, %r46;
+; CHECKPTX62-NEXT: shr.u32 %r20, %r4, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r20;
; CHECKPTX62-NEXT: add.rn.f16 %rs3, %rs2, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r21, %rs3;
; CHECKPTX62-NEXT: shl.b32 %r22, %r21, %r2;
-; CHECKPTX62-NEXT: and.b32 %r23, %r46, %r3;
+; CHECKPTX62-NEXT: and.b32 %r23, %r4, %r3;
; CHECKPTX62-NEXT: or.b32 %r24, %r23, %r22;
-; CHECKPTX62-NEXT: atom.relaxed.sys.cas.b32 %r4, [%r1], %r46, %r24;
-; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r4, %r46;
-; CHECKPTX62-NEXT: mov.b32 %r46, %r4;
+; CHECKPTX62-NEXT: atom.relaxed.sys.cas.b32 %r46, [%r1], %r4, %r24;
+; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r46, %r4;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.b32 %r47, [%r1];
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT: shr.u32 %r25, %r47, %r2;
+; CHECKPTX62-NEXT: ...
[truncated]
|
qcolombet
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
655709c to
703fc9f
Compare
4222347 to
68599da
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's more reg-reg moves occurring after this change, which is rather unfortunate.
Enables the terminal rule for remaining targets
68599da to
f295da1
Compare
Enables the terminal rule for remaining targets
|
Can we add a target specific hook to disable |
What do you mean by failure? Failures are just bugs and not a reason to maintain points of optimization configurability |

Enables the terminal rule for remaining targets