@@ -756,24 +756,24 @@ define i128 @test_atomicrmw_and(ptr %ptr, i128 %val) {
756756; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
757757; CHECK-NEXT: $L__BB34_1: // %atomicrmw.start
758758; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
759- ; CHECK-NEXT: and.b64 %rd6, %rd11, %rd4;
760- ; CHECK-NEXT: and.b64 %rd7, %rd12, %rd5;
759+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
760+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
761+ ; CHECK-NEXT: and.b64 %rd6, %rd1, %rd4;
762+ ; CHECK-NEXT: and.b64 %rd7, %rd2, %rd5;
761763; CHECK-NEXT: {
762764; CHECK-NEXT: .reg .b128 cmp, swap, dst;
763- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
765+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
764766; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
765767; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
766- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
768+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
767769; CHECK-NEXT: }
768- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
769- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
770+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
771+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
770772; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
771773; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
772- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
773- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
774774; CHECK-NEXT: @%p1 bra $L__BB34_1;
775775; CHECK-NEXT: // %bb.2: // %atomicrmw.end
776- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
776+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
777777; CHECK-NEXT: ret;
778778 %ret = atomicrmw and ptr %ptr , i128 %val monotonic
779779 ret i128 %ret
@@ -791,24 +791,24 @@ define i128 @test_atomicrmw_or(ptr %ptr, i128 %val) {
791791; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
792792; CHECK-NEXT: $L__BB35_1: // %atomicrmw.start
793793; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
794- ; CHECK-NEXT: or.b64 %rd6, %rd11, %rd4;
795- ; CHECK-NEXT: or.b64 %rd7, %rd12, %rd5;
794+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
795+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
796+ ; CHECK-NEXT: or.b64 %rd6, %rd1, %rd4;
797+ ; CHECK-NEXT: or.b64 %rd7, %rd2, %rd5;
796798; CHECK-NEXT: {
797799; CHECK-NEXT: .reg .b128 cmp, swap, dst;
798- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
800+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
799801; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
800802; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
801- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
803+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
802804; CHECK-NEXT: }
803- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
804- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
805+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
806+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
805807; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
806808; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
807- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
808- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
809809; CHECK-NEXT: @%p1 bra $L__BB35_1;
810810; CHECK-NEXT: // %bb.2: // %atomicrmw.end
811- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
811+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
812812; CHECK-NEXT: ret;
813813 %ret = atomicrmw or ptr %ptr , i128 %val monotonic
814814 ret i128 %ret
@@ -826,24 +826,24 @@ define i128 @test_atomicrmw_xor(ptr %ptr, i128 %val) {
826826; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
827827; CHECK-NEXT: $L__BB36_1: // %atomicrmw.start
828828; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
829- ; CHECK-NEXT: xor.b64 %rd6, %rd11, %rd4;
830- ; CHECK-NEXT: xor.b64 %rd7, %rd12, %rd5;
829+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
830+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
831+ ; CHECK-NEXT: xor.b64 %rd6, %rd1, %rd4;
832+ ; CHECK-NEXT: xor.b64 %rd7, %rd2, %rd5;
831833; CHECK-NEXT: {
832834; CHECK-NEXT: .reg .b128 cmp, swap, dst;
833- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
835+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
834836; CHECK-NEXT: mov.b128 swap, {%rd6, %rd7};
835837; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
836- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
838+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
837839; CHECK-NEXT: }
838- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
839- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
840+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
841+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
840842; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
841843; CHECK-NEXT: setp.ne.b64 %p1, %rd10, 0;
842- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
843- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
844844; CHECK-NEXT: @%p1 bra $L__BB36_1;
845845; CHECK-NEXT: // %bb.2: // %atomicrmw.end
846- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
846+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
847847; CHECK-NEXT: ret;
848848 %ret = atomicrmw xor ptr %ptr , i128 %val monotonic
849849 ret i128 %ret
@@ -861,29 +861,29 @@ define i128 @test_atomicrmw_min(ptr %ptr, i128 %val) {
861861; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
862862; CHECK-NEXT: $L__BB37_1: // %atomicrmw.start
863863; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
864- ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
865- ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
864+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
865+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
866+ ; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
867+ ; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
866868; CHECK-NEXT: and.pred %p3, %p2, %p1;
867- ; CHECK-NEXT: setp.lt.s64 %p4, %rd12 , %rd5;
869+ ; CHECK-NEXT: setp.lt.s64 %p4, %rd2 , %rd5;
868870; CHECK-NEXT: or.pred %p5, %p3, %p4;
869- ; CHECK-NEXT: selp.b64 %rd6, %rd12 , %rd5, %p5;
870- ; CHECK-NEXT: selp.b64 %rd7, %rd11 , %rd4, %p5;
871+ ; CHECK-NEXT: selp.b64 %rd6, %rd2 , %rd5, %p5;
872+ ; CHECK-NEXT: selp.b64 %rd7, %rd1 , %rd4, %p5;
871873; CHECK-NEXT: {
872874; CHECK-NEXT: .reg .b128 cmp, swap, dst;
873- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
875+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
874876; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
875877; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
876- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
878+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
877879; CHECK-NEXT: }
878- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
879- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
880+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
881+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
880882; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
881883; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
882- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
883- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
884884; CHECK-NEXT: @%p6 bra $L__BB37_1;
885885; CHECK-NEXT: // %bb.2: // %atomicrmw.end
886- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
886+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
887887; CHECK-NEXT: ret;
888888 %ret = atomicrmw min ptr %ptr , i128 %val monotonic
889889 ret i128 %ret
@@ -901,29 +901,29 @@ define i128 @test_atomicrmw_max(ptr %ptr, i128 %val) {
901901; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
902902; CHECK-NEXT: $L__BB38_1: // %atomicrmw.start
903903; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
904- ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
905- ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
904+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
905+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
906+ ; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
907+ ; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
906908; CHECK-NEXT: and.pred %p3, %p2, %p1;
907- ; CHECK-NEXT: setp.gt.s64 %p4, %rd12 , %rd5;
909+ ; CHECK-NEXT: setp.gt.s64 %p4, %rd2 , %rd5;
908910; CHECK-NEXT: or.pred %p5, %p3, %p4;
909- ; CHECK-NEXT: selp.b64 %rd6, %rd12 , %rd5, %p5;
910- ; CHECK-NEXT: selp.b64 %rd7, %rd11 , %rd4, %p5;
911+ ; CHECK-NEXT: selp.b64 %rd6, %rd2 , %rd5, %p5;
912+ ; CHECK-NEXT: selp.b64 %rd7, %rd1 , %rd4, %p5;
911913; CHECK-NEXT: {
912914; CHECK-NEXT: .reg .b128 cmp, swap, dst;
913- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
915+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
914916; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
915917; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
916- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
918+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
917919; CHECK-NEXT: }
918- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
919- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
920+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
921+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
920922; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
921923; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
922- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
923- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
924924; CHECK-NEXT: @%p6 bra $L__BB38_1;
925925; CHECK-NEXT: // %bb.2: // %atomicrmw.end
926- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
926+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
927927; CHECK-NEXT: ret;
928928 %ret = atomicrmw max ptr %ptr , i128 %val monotonic
929929 ret i128 %ret
@@ -941,29 +941,29 @@ define i128 @test_atomicrmw_umin(ptr %ptr, i128 %val) {
941941; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
942942; CHECK-NEXT: $L__BB39_1: // %atomicrmw.start
943943; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
944- ; CHECK-NEXT: setp.lt.u64 %p1, %rd11, %rd4;
945- ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
944+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
945+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
946+ ; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd4;
947+ ; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
946948; CHECK-NEXT: and.pred %p3, %p2, %p1;
947- ; CHECK-NEXT: setp.lt.u64 %p4, %rd12 , %rd5;
949+ ; CHECK-NEXT: setp.lt.u64 %p4, %rd2 , %rd5;
948950; CHECK-NEXT: or.pred %p5, %p3, %p4;
949- ; CHECK-NEXT: selp.b64 %rd6, %rd12 , %rd5, %p5;
950- ; CHECK-NEXT: selp.b64 %rd7, %rd11 , %rd4, %p5;
951+ ; CHECK-NEXT: selp.b64 %rd6, %rd2 , %rd5, %p5;
952+ ; CHECK-NEXT: selp.b64 %rd7, %rd1 , %rd4, %p5;
951953; CHECK-NEXT: {
952954; CHECK-NEXT: .reg .b128 cmp, swap, dst;
953- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
955+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
954956; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
955957; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
956- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
958+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
957959; CHECK-NEXT: }
958- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
959- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
960+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
961+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
960962; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
961963; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
962- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
963- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
964964; CHECK-NEXT: @%p6 bra $L__BB39_1;
965965; CHECK-NEXT: // %bb.2: // %atomicrmw.end
966- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
966+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
967967; CHECK-NEXT: ret;
968968 %ret = atomicrmw umin ptr %ptr , i128 %val monotonic
969969 ret i128 %ret
@@ -981,29 +981,29 @@ define i128 @test_atomicrmw_umax(ptr %ptr, i128 %val) {
981981; CHECK-NEXT: ld.v2.b64 {%rd11, %rd12}, [%rd3];
982982; CHECK-NEXT: $L__BB40_1: // %atomicrmw.start
983983; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
984- ; CHECK-NEXT: setp.gt.u64 %p1, %rd11, %rd4;
985- ; CHECK-NEXT: setp.eq.b64 %p2, %rd12, %rd5;
984+ ; CHECK-NEXT: mov.b64 %rd2, %rd12;
985+ ; CHECK-NEXT: mov.b64 %rd1, %rd11;
986+ ; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd4;
987+ ; CHECK-NEXT: setp.eq.b64 %p2, %rd2, %rd5;
986988; CHECK-NEXT: and.pred %p3, %p2, %p1;
987- ; CHECK-NEXT: setp.gt.u64 %p4, %rd12 , %rd5;
989+ ; CHECK-NEXT: setp.gt.u64 %p4, %rd2 , %rd5;
988990; CHECK-NEXT: or.pred %p5, %p3, %p4;
989- ; CHECK-NEXT: selp.b64 %rd6, %rd12 , %rd5, %p5;
990- ; CHECK-NEXT: selp.b64 %rd7, %rd11 , %rd4, %p5;
991+ ; CHECK-NEXT: selp.b64 %rd6, %rd2 , %rd5, %p5;
992+ ; CHECK-NEXT: selp.b64 %rd7, %rd1 , %rd4, %p5;
991993; CHECK-NEXT: {
992994; CHECK-NEXT: .reg .b128 cmp, swap, dst;
993- ; CHECK-NEXT: mov.b128 cmp, {%rd11 , %rd12 };
995+ ; CHECK-NEXT: mov.b128 cmp, {%rd1 , %rd2 };
994996; CHECK-NEXT: mov.b128 swap, {%rd7, %rd6};
995997; CHECK-NEXT: atom.relaxed.sys.cas.b128 dst, [%rd3], cmp, swap;
996- ; CHECK-NEXT: mov.b128 {%rd1 , %rd2 }, dst;
998+ ; CHECK-NEXT: mov.b128 {%rd11 , %rd12 }, dst;
997999; CHECK-NEXT: }
998- ; CHECK-NEXT: xor.b64 %rd8, %rd2 , %rd12 ;
999- ; CHECK-NEXT: xor.b64 %rd9, %rd1 , %rd11 ;
1000+ ; CHECK-NEXT: xor.b64 %rd8, %rd12 , %rd2 ;
1001+ ; CHECK-NEXT: xor.b64 %rd9, %rd11 , %rd1 ;
10001002; CHECK-NEXT: or.b64 %rd10, %rd9, %rd8;
10011003; CHECK-NEXT: setp.ne.b64 %p6, %rd10, 0;
1002- ; CHECK-NEXT: mov.b64 %rd11, %rd1;
1003- ; CHECK-NEXT: mov.b64 %rd12, %rd2;
10041004; CHECK-NEXT: @%p6 bra $L__BB40_1;
10051005; CHECK-NEXT: // %bb.2: // %atomicrmw.end
1006- ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd1 , %rd2 };
1006+ ; CHECK-NEXT: st.param.v2.b64 [func_retval0], {%rd11 , %rd12 };
10071007; CHECK-NEXT: ret;
10081008 %ret = atomicrmw umax ptr %ptr , i128 %val monotonic
10091009 ret i128 %ret
0 commit comments