-
Notifications
You must be signed in to change notification settings - Fork 15k
CodeGen: Remove target hook for terminal rule #165962
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: users/arsenm/riscv/enable-terminal-rule
Are you sure you want to change the base?
CodeGen: Remove target hook for terminal rule #165962
Conversation
Enables the terminal rule for remaining targets
|
Warning This pull request is not mergeable via GitHub because a downstack PR is open. Once all requirements are satisfied, merge this PR as a stack on Graphite.
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-llvm-regalloc Author: Matt Arsenault (arsenm) ChangesEnables the terminal rule for remaining targets Patch is 44.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165962.diff 22 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 6e0bd701a191d..431478ad7a4d4 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals",
cl::desc("Coalesce copies (default=true)"),
cl::init(true), cl::Hidden);
-static cl::opt<cl::boolOrDefault>
- EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"),
- cl::init(cl::BOU_UNSET), cl::Hidden);
+static cl::opt<bool> UseTerminalRule("terminal-rule",
+ cl::desc("Apply the terminal rule"),
+ cl::init(true), cl::Hidden);
/// Temporary flag to test critical edge unsplitting.
static cl::opt<bool> EnableJoinSplits(
@@ -134,7 +134,6 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
SlotIndexes *SI = nullptr;
const MachineLoopInfo *Loops = nullptr;
RegisterClassInfo RegClassInfo;
- bool UseTerminalRule = false;
/// Position and VReg of a PHI instruction during coalescing.
struct PHIValPos {
@@ -4313,11 +4312,6 @@ bool RegisterCoalescer::run(MachineFunction &fn) {
else
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
- if (EnableTerminalRule == cl::BOU_UNSET)
- UseTerminalRule = STI.enableTerminalRule();
- else
- UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE;
-
// If there are PHIs tracked by debug-info, they will need updating during
// coalescing. Build an index of those PHIs to ease updating.
SlotIndexes *Slots = LIS->getSlotIndexes();
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index ab4004e30f629..8974965c41fe3 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -157,7 +157,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
- bool enableTerminalRule() const override { return true; }
+
bool enableMachinePipeliner() const override;
bool useDFAforSMS() const override { return false; }
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index da4bd878b8853..f377b8aaf1333 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1040,8 +1040,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return true;
}
- bool enableTerminalRule() const override { return true; }
-
bool useAA() const override;
bool enableSubRegLiveness() const override {
diff --git a/llvm/lib/Target/AMDGPU/R600Subtarget.h b/llvm/lib/Target/AMDGPU/R600Subtarget.h
index efd99dbc1a08b..22e56b66e1827 100644
--- a/llvm/lib/Target/AMDGPU/R600Subtarget.h
+++ b/llvm/lib/Target/AMDGPU/R600Subtarget.h
@@ -126,8 +126,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
return true;
}
- bool enableTerminalRule() const override { return true; }
-
bool enableSubRegLiveness() const override {
return true;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 34baa3108402c..4a0883cc662e7 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -377,7 +377,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
- bool enableTerminalRule() const override { return true; }
bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index b3d5d010d378d..995f66d0551b4 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -295,8 +295,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
bool useBSBScheduling() const { return UseBSBScheduling; }
bool enableMachineScheduler() const override;
- bool enableTerminalRule() const override { return true; }
-
// Always use the TargetLowering default scheduler.
// FIXME: This will use the vliw scheduler which is probably just hurting
// compiler time and will be removed eventually anyway.
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 817fb7cb5c41a..4b4fc8f0d8e76 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -147,7 +147,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
bool enableMachineScheduler() const override { return true; }
- bool enableTerminalRule() const override { return true; }
bool enablePostRAScheduler() const override { return UsePostRAScheduler; }
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4f5aadca361fe..868f41375b96b 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -419,8 +419,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
- bool enableTerminalRule() const override { return true; }
-
bool enableEarlyIfConversion() const override;
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>
diff --git a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
index 895b68b5a9145..ce40085feb0d0 100644
--- a/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
+++ b/llvm/test/CodeGen/BPF/objdump_cond_op_2.ll
@@ -25,8 +25,7 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
%11 = sub nsw i32 %7, %9
%12 = icmp slt i32 %10, %11
br i1 %12, label %5, label %13
-; CHECK: r1 = r3
-; CHECK: if r2 s> r3 goto -10 <test+0x40>
+; CHECK: if r2 s> r1 goto -10 <test+0x40>
; <label>:13: ; preds = %5, %2
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]
diff --git a/llvm/test/CodeGen/NVPTX/atomics-b128.ll b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
index b2a3f94d11a16..3057e91e8ebe4 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-b128.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-b128.ll
@@ -756,24 +756,24 @@ define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: and.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: and.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB34_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw and ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -791,24 +791,24 @@ define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: or.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: or.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB35_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw or ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -826,24 +826,24 @@ define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
-; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd4;
+; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p1 bra $L__BB36_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw xor ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -861,29 +861,29 @@ define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.lt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB37_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw min ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -901,29 +901,29 @@ define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.gt.s64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB38_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw max ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -941,29 +941,29 @@ define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.lt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB39_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umin ptr %ptr, i128 %val monotonic
ret i128 %ret
@@ -981,29 +981,29 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
-; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
+; CHECK-NEXT: mov.b64 %rd2, %rd12;
+; CHECK-NEXT: mov.b64 %rd1, %rd11;
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
+; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
; CHECK-NEXT: and.pred %p3, %p2, %p1;
-; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5;
+; CHECK-NEXT: setp.gt.u64 %p4, %rd2, %rd5;
; CHECK-NEXT: or.pred %p5, %p3, %p4;
-; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
-; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
+; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
+; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
-; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
+; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
-; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
+; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
; CHECK-NEXT: }
-; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
-; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
+; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
+; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
-; CHECK-NEXT: mov.b64 %rd11, %rd1;
-; CHECK-NEXT: mov.b64 %rd12, %rd2;
; CHECK-NEXT: @%p6 bra $L__BB40_1;
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
-; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
+; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
; CHECK-NEXT: ret;
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
ret i128 %ret
diff --git a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
index e2762bac45a35..313be95c03192 100644
--- a/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
+++ b/llvm/test/CodeGen/NVPTX/atomics-sm70.ll
@@ -63,32 +63,32 @@ define void @test(ptr %dp0, ptr addrspace(1) %dp1, ptr addrspace(3) %dp3, half %
; CHECKPTX62-NEXT: ld.b32 %r46, [%r1];
; CHECKPTX62-NEXT: $L__BB0_1: // %atomicrmw.start45
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT: shr.u32 %r20, %r46, %r2;
+; CHECKPTX62-NEXT: mov.b32 %r4, %r46;
+; CHECKPTX62-NEXT: shr.u32 %r20, %r4, %r2;
; CHECKPTX62-NEXT: cvt.u16.u32 %rs2, %r20;
; CHECKPTX62-NEXT: add.rn.f16 %rs3, %rs2, %rs1;
; CHECKPTX62-NEXT: cvt.u32.u16 %r21, %rs3;
; CHECKPTX62-NEXT: shl.b32 %r22, %r21, %r2;
-; CHECKPTX62-NEXT: and.b32 %r23, %r46, %r3;
+; CHECKPTX62-NEXT: and.b32 %r23, %r4, %r3;
; CHECKPTX62-NEXT: or.b32 %r24, %r23, %r22;
-; CHECKPTX62-NEXT: atom.relaxed.sys.cas.b32 %r4, [%r1], %r46, %r24;
-; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r4, %r46;
-; CHECKPTX62-NEXT: mov.b32 %r46, %r4;
+; CHECKPTX62-NEXT: atom.relaxed.sys.cas.b32 %r46, [%r1], %r4, %r24;
+; CHECKPTX62-NEXT: setp.ne.b32 %p1, %r46, %r4;
; CHECKPTX62-NEXT: @%p1 bra $L__BB0_1;
; CHECKPTX62-NEXT: // %bb.2: // %atomicrmw.end44
; CHECKPTX62-NEXT: ld.b32 %r47, [%r1];
; CHECKPTX62-NEXT: $L__BB0_3: // %atomicrmw.start27
; CHECKPTX62-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECKPTX62-NEXT: shr.u32 %r25, %r47, %r2;
+; CHECKPTX62-NEXT: ...
[truncated]
|

Enables the terminal rule for remaining targets