Skip to content

Commit 4222347

Browse files
committed
CodeGen: Remove target hook for terminal rule
Enables the terminal rule for remaining targets
1 parent 655709c commit 4222347

22 files changed

+207
-226
lines changed

llvm/lib/CodeGen/RegisterCoalescer.cpp

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ static cl::opt<bool> EnableJoining("join-liveintervals",
7979
cl::desc("Coalesce copies (default=true)"),
8080
cl::init(true), cl::Hidden);
8181

82-
static cl::opt<cl::boolOrDefault>
83-
EnableTerminalRule("terminal-rule", cl::desc("Apply the terminal rule"),
84-
cl::init(cl::BOU_UNSET), cl::Hidden);
82+
static cl::opt<bool> UseTerminalRule("terminal-rule",
83+
cl::desc("Apply the terminal rule"),
84+
cl::init(true), cl::Hidden);
8585

8686
/// Temporary flag to test critical edge unsplitting.
8787
static cl::opt<bool> EnableJoinSplits(
@@ -134,7 +134,6 @@ class RegisterCoalescer : private LiveRangeEdit::Delegate {
134134
SlotIndexes *SI = nullptr;
135135
const MachineLoopInfo *Loops = nullptr;
136136
RegisterClassInfo RegClassInfo;
137-
bool UseTerminalRule = false;
138137

139138
/// Position and VReg of a PHI instruction during coalescing.
140139
struct PHIValPos {
@@ -4313,11 +4312,6 @@ bool RegisterCoalescer::run(MachineFunction &fn) {
43134312
else
43144313
JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
43154314

4316-
if (EnableTerminalRule == cl::BOU_UNSET)
4317-
UseTerminalRule = STI.enableTerminalRule();
4318-
else
4319-
UseTerminalRule = EnableTerminalRule == cl::BOU_TRUE;
4320-
43214315
// If there are PHIs tracked by debug-info, they will need updating during
43224316
// coalescing. Build an index of those PHIs to ease updating.
43234317
SlotIndexes *Slots = LIS->getSlotIndexes();

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
157157
bool enableMachineScheduler() const override { return true; }
158158
bool enablePostRAScheduler() const override { return usePostRAScheduler(); }
159159
bool enableSubRegLiveness() const override { return EnableSubregLiveness; }
160-
bool enableTerminalRule() const override { return true; }
160+
161161
bool enableMachinePipeliner() const override;
162162
bool useDFAforSMS() const override { return false; }
163163

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,8 +1040,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
10401040
return true;
10411041
}
10421042

1043-
bool enableTerminalRule() const override { return true; }
1044-
10451043
bool useAA() const override;
10461044

10471045
bool enableSubRegLiveness() const override {

llvm/lib/Target/AMDGPU/R600Subtarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,6 @@ class R600Subtarget final : public R600GenSubtargetInfo,
126126
return true;
127127
}
128128

129-
bool enableTerminalRule() const override { return true; }
130-
131129
bool enableSubRegLiveness() const override {
132130
return true;
133131
}

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
377377
bool isRWPI() const;
378378

379379
bool useMachineScheduler() const { return UseMISched; }
380-
bool enableTerminalRule() const override { return true; }
381380
bool useMachinePipeliner() const { return UseMIPipeliner; }
382381
bool hasMinSize() const { return OptMinSize; }
383382
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }

llvm/lib/Target/Hexagon/HexagonSubtarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,8 +295,6 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
295295
bool useBSBScheduling() const { return UseBSBScheduling; }
296296
bool enableMachineScheduler() const override;
297297

298-
bool enableTerminalRule() const override { return true; }
299-
300298
// Always use the TargetLowering default scheduler.
301299
// FIXME: This will use the vliw scheduler which is probably just hurting
302300
// compiler time and will be removed eventually anyway.

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
147147
}
148148

149149
bool enableMachineScheduler() const override { return true; }
150-
bool enableTerminalRule() const override { return true; }
151150

152151
bool enablePostRAScheduler() const override { return UsePostRAScheduler; }
153152

llvm/lib/Target/X86/X86Subtarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -419,8 +419,6 @@ class X86Subtarget final : public X86GenSubtargetInfo {
419419
/// Enable the MachineScheduler pass for all X86 subtargets.
420420
bool enableMachineScheduler() const override { return true; }
421421

422-
bool enableTerminalRule() const override { return true; }
423-
424422
bool enableEarlyIfConversion() const override;
425423

426424
void getPostRAMutations(std::vector<std::unique_ptr<ScheduleDAGMutation>>

llvm/test/CodeGen/BPF/objdump_cond_op_2.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ define i32 @test(i32, i32) local_unnamed_addr #0 {
2525
%11 = sub nsw i32 %7, %9
2626
%12 = icmp slt i32 %10, %11
2727
br i1 %12, label %5, label %13
28-
; CHECK: r1 = r3
29-
; CHECK: if r2 s> r3 goto -10 <test+0x40>
28+
; CHECK: if r2 s> r1 goto -10 <test+0x40>
3029

3130
; <label>:13: ; preds = %5, %2
3231
%14 = phi i32 [ 0, %2 ], [ %9, %5 ]

llvm/test/CodeGen/NVPTX/atomics-b128.ll

Lines changed: 75 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -756,24 +756,24 @@ define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
756756
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
757757
; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
758758
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
759-
; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
760-
; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
759+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
760+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
761+
; CHECK-NEXT: and.b64 %rd6, %rd1, %rd4;
762+
; CHECK-NEXT: and.b64 %rd7, %rd2, %rd5;
761763
; CHECK-NEXT: {
762764
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
763-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
765+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
764766
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
765767
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
766-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
768+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
767769
; CHECK-NEXT: }
768-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
769-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
770+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
771+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
770772
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
771773
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
772-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
773-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
774774
; CHECK-NEXT: @%p1 bra $L__BB34_1;
775775
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
776-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
776+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
777777
; CHECK-NEXT: ret;
778778
%ret = atomicrmw and ptr %ptr, i128 %val monotonic
779779
ret i128 %ret
@@ -791,24 +791,24 @@ define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
791791
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
792792
; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
793793
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
794-
; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
795-
; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
794+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
795+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
796+
; CHECK-NEXT: or.b64 %rd6, %rd1, %rd4;
797+
; CHECK-NEXT: or.b64 %rd7, %rd2, %rd5;
796798
; CHECK-NEXT: {
797799
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
798-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
800+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
799801
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
800802
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
801-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
803+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
802804
; CHECK-NEXT: }
803-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
804-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
805+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
806+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
805807
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
806808
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
807-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
808-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
809809
; CHECK-NEXT: @%p1 bra $L__BB35_1;
810810
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
811-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
811+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
812812
; CHECK-NEXT: ret;
813813
%ret = atomicrmw or ptr %ptr, i128 %val monotonic
814814
ret i128 %ret
@@ -826,24 +826,24 @@ define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
826826
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
827827
; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
828828
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
829-
; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
830-
; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
829+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
830+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
831+
; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd4;
832+
; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd5;
831833
; CHECK-NEXT: {
832834
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
833-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
835+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
834836
; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
835837
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
836-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
838+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
837839
; CHECK-NEXT: }
838-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
839-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
840+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
841+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
840842
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
841843
; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
842-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
843-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
844844
; CHECK-NEXT: @%p1 bra $L__BB36_1;
845845
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
846-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
846+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
847847
; CHECK-NEXT: ret;
848848
%ret = atomicrmw xor ptr %ptr, i128 %val monotonic
849849
ret i128 %ret
@@ -861,29 +861,29 @@ define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
861861
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
862862
; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
863863
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
864-
; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
865-
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
864+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
865+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
866+
; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
867+
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
866868
; CHECK-NEXT: and.pred %p3, %p2, %p1;
867-
; CHECK-NEXT: setp.lt.s64 %p4, %rd12, %rd5;
869+
; CHECK-NEXT: setp.lt.s64 %p4, %rd2, %rd5;
868870
; CHECK-NEXT: or.pred %p5, %p3, %p4;
869-
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
870-
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
871+
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
872+
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
871873
; CHECK-NEXT: {
872874
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
873-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
875+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
874876
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
875877
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
876-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
878+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
877879
; CHECK-NEXT: }
878-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
879-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
880+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
881+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
880882
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
881883
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
882-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
883-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
884884
; CHECK-NEXT: @%p6 bra $L__BB37_1;
885885
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
886-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
886+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
887887
; CHECK-NEXT: ret;
888888
%ret = atomicrmw min ptr %ptr, i128 %val monotonic
889889
ret i128 %ret
@@ -901,29 +901,29 @@ define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
901901
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
902902
; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
903903
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
904-
; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
905-
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
904+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
905+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
906+
; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
907+
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
906908
; CHECK-NEXT: and.pred %p3, %p2, %p1;
907-
; CHECK-NEXT: setp.gt.s64 %p4, %rd12, %rd5;
909+
; CHECK-NEXT: setp.gt.s64 %p4, %rd2, %rd5;
908910
; CHECK-NEXT: or.pred %p5, %p3, %p4;
909-
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
910-
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
911+
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
912+
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
911913
; CHECK-NEXT: {
912914
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
913-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
915+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
914916
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
915917
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
916-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
918+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
917919
; CHECK-NEXT: }
918-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
919-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
920+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
921+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
920922
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
921923
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
922-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
923-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
924924
; CHECK-NEXT: @%p6 bra $L__BB38_1;
925925
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
926-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
926+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
927927
; CHECK-NEXT: ret;
928928
%ret = atomicrmw max ptr %ptr, i128 %val monotonic
929929
ret i128 %ret
@@ -941,29 +941,29 @@ define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
941941
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
942942
; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
943943
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
944-
; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
945-
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
944+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
945+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
946+
; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
947+
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
946948
; CHECK-NEXT: and.pred %p3, %p2, %p1;
947-
; CHECK-NEXT: setp.lt.u64 %p4, %rd12, %rd5;
949+
; CHECK-NEXT: setp.lt.u64 %p4, %rd2, %rd5;
948950
; CHECK-NEXT: or.pred %p5, %p3, %p4;
949-
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
950-
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
951+
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
952+
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
951953
; CHECK-NEXT: {
952954
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
953-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
955+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
954956
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
955957
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
956-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
958+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
957959
; CHECK-NEXT: }
958-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
959-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
960+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
961+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
960962
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
961963
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
962-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
963-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
964964
; CHECK-NEXT: @%p6 bra $L__BB39_1;
965965
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
966-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
966+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
967967
; CHECK-NEXT: ret;
968968
%ret = atomicrmw umin ptr %ptr, i128 %val monotonic
969969
ret i128 %ret
@@ -981,29 +981,29 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
981981
; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
982982
; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
983983
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
984-
; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
985-
; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
984+
; CHECK-NEXT: mov.b64 %rd2, %rd12;
985+
; CHECK-NEXT: mov.b64 %rd1, %rd11;
986+
; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
987+
; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
986988
; CHECK-NEXT: and.pred %p3, %p2, %p1;
987-
; CHECK-NEXT: setp.gt.u64 %p4, %rd12, %rd5;
989+
; CHECK-NEXT: setp.gt.u64 %p4, %rd2, %rd5;
988990
; CHECK-NEXT: or.pred %p5, %p3, %p4;
989-
; CHECK-NEXT: selp.b64 %rd6, %rd12, %rd5, %p5;
990-
; CHECK-NEXT: selp.b64 %rd7, %rd11, %rd4, %p5;
991+
; CHECK-NEXT: selp.b64 %rd6, %rd2, %rd5, %p5;
992+
; CHECK-NEXT: selp.b64 %rd7, %rd1, %rd4, %p5;
991993
; CHECK-NEXT: {
992994
; CHECK-NEXT: .reg .b128 cmp, swap, dst;
993-
; CHECK-NEXT: mov.b128 cmp, {%rd11, %rd12};
995+
; CHECK-NEXT: mov.b128 cmp, {%rd1, %rd2};
994996
; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
995997
; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
996-
; CHECK-NEXT: mov.b128 {%rd1, %rd2}, dst;
998+
; CHECK-NEXT: mov.b128 {%rd11, %rd12}, dst;
997999
; CHECK-NEXT: }
998-
; CHECK-NEXT: xor.b64 %rd8, %rd2, %rd12;
999-
; CHECK-NEXT: xor.b64 %rd9, %rd1, %rd11;
1000+
; CHECK-NEXT: xor.b64 %rd8, %rd12, %rd2;
1001+
; CHECK-NEXT: xor.b64 %rd9, %rd11, %rd1;
10001002
; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
10011003
; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
1002-
; CHECK-NEXT: mov.b64 %rd11, %rd1;
1003-
; CHECK-NEXT: mov.b64 %rd12, %rd2;
10041004
; CHECK-NEXT: @%p6 bra $L__BB40_1;
10051005
; CHECK-NEXT: // %bb.2: // %atomicrmw.end
1006-
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1, %rd2};
1006+
; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11, %rd12};
10071007
; CHECK-NEXT: ret;
10081008
%ret = atomicrmw umax ptr %ptr, i128 %val monotonic
10091009
ret i128 %ret

0 commit comments

Comments
 (0)