@@ -969,37 +969,38 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
969
969
; GFX950: ; %bb.0:
970
970
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971
971
; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
972
- ; GFX950-NEXT: v_lshl_add_u64 v[4:5 ], v[0:1], 0, s[0:1]
972
+ ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
973
973
; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
974
- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
974
+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
975
975
; GFX950-NEXT: ;;#ASMSTART
976
- ; GFX950-NEXT: ; def v[0:1 ]
976
+ ; GFX950-NEXT: ; def v[4:5 ]
977
977
; GFX950-NEXT: ;;#ASMEND
978
- ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
978
+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
979
979
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
980
980
; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
981
981
; GFX950-NEXT: s_cbranch_execz .LBB14_2
982
982
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
983
983
; GFX950-NEXT: buffer_wbl2 sc0 sc1
984
- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[4:5 ], v[0:1 ] sc0 sc1
984
+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[2:3 ], v[4:5 ] sc0 sc1
985
985
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
986
986
; GFX950-NEXT: buffer_inv sc0 sc1
987
+ ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
987
988
; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
988
989
; GFX950-NEXT: .LBB14_2: ; %Flow
989
990
; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
990
991
; GFX950-NEXT: s_cbranch_execz .LBB14_4
991
992
; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
992
- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5 ]
993
+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
993
994
; GFX950-NEXT: s_nop 1
994
- ; GFX950-NEXT: v_cndmask_b32_e32 v4 , -1, v4 , vcc
995
- ; GFX950-NEXT: scratch_load_dwordx2 v[2:3 ], v4 , off
995
+ ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
996
+ ; GFX950-NEXT: scratch_load_dwordx2 v[0:1 ], v2 , off
996
997
; GFX950-NEXT: s_nop 0
997
- ; GFX950-NEXT: scratch_store_dwordx2 v4 , v[0:1 ], off
998
+ ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[4:5 ], off
998
999
; GFX950-NEXT: .LBB14_4: ; %atomicrmw.phi
999
1000
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
1000
1001
; GFX950-NEXT: s_waitcnt vmcnt(1)
1001
1002
; GFX950-NEXT: ;;#ASMSTART
1002
- ; GFX950-NEXT: ; use v[2:3 ]
1003
+ ; GFX950-NEXT: ; use v[0:1 ]
1003
1004
; GFX950-NEXT: ;;#ASMEND
1004
1005
; GFX950-NEXT: s_waitcnt vmcnt(0)
1005
1006
; GFX950-NEXT: s_setpc_b64 s[30:31]
@@ -1058,37 +1059,38 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
1058
1059
; GFX950: ; %bb.0:
1059
1060
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1060
1061
; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
1061
- ; GFX950-NEXT: v_lshl_add_u64 v[4:5 ], v[0:1], 0, s[0:1]
1062
+ ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
1062
1063
; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
1063
- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
1064
+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
1064
1065
; GFX950-NEXT: ;;#ASMSTART
1065
- ; GFX950-NEXT: ; def v[0:1 ]
1066
+ ; GFX950-NEXT: ; def v[4:5 ]
1066
1067
; GFX950-NEXT: ;;#ASMEND
1067
- ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
1068
+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
1068
1069
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
1069
1070
; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
1070
1071
; GFX950-NEXT: s_cbranch_execz .LBB15_2
1071
1072
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
1072
1073
; GFX950-NEXT: buffer_wbl2 sc0 sc1
1073
- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[4:5 ], v[0:1 ] sc0 sc1
1074
+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[2:3 ], v[4:5 ] sc0 sc1
1074
1075
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1075
1076
; GFX950-NEXT: buffer_inv sc0 sc1
1077
+ ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
1076
1078
; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
1077
1079
; GFX950-NEXT: .LBB15_2: ; %Flow
1078
1080
; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
1079
1081
; GFX950-NEXT: s_cbranch_execz .LBB15_4
1080
1082
; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
1081
- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5 ]
1083
+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
1082
1084
; GFX950-NEXT: s_nop 1
1083
- ; GFX950-NEXT: v_cndmask_b32_e32 v4 , -1, v4 , vcc
1084
- ; GFX950-NEXT: scratch_load_dwordx2 v[2:3 ], v4 , off
1085
+ ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
1086
+ ; GFX950-NEXT: scratch_load_dwordx2 v[0:1 ], v2 , off
1085
1087
; GFX950-NEXT: s_nop 0
1086
- ; GFX950-NEXT: scratch_store_dwordx2 v4 , v[0:1 ], off
1088
+ ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[4:5 ], off
1087
1089
; GFX950-NEXT: .LBB15_4: ; %atomicrmw.phi
1088
1090
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
1089
1091
; GFX950-NEXT: s_waitcnt vmcnt(1)
1090
1092
; GFX950-NEXT: ;;#ASMSTART
1091
- ; GFX950-NEXT: ; use v[2:3 ]
1093
+ ; GFX950-NEXT: ; use v[0:1 ]
1092
1094
; GFX950-NEXT: ;;#ASMEND
1093
1095
; GFX950-NEXT: s_waitcnt vmcnt(0)
1094
1096
; GFX950-NEXT: s_setpc_b64 s[30:31]
@@ -1149,34 +1151,35 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 {
1149
1151
; GFX950: ; %bb.0:
1150
1152
; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1151
1153
; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
1152
- ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
1154
+ ; GFX950-NEXT: v_lshl_add_u64 v[0:1 ], v[0:1], 0, s[0:1]
1153
1155
; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
1154
- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
1156
+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
1155
1157
; GFX950-NEXT: ;;#ASMSTART
1156
- ; GFX950-NEXT: ; def v[0:1 ]
1158
+ ; GFX950-NEXT: ; def v[2:3 ]
1157
1159
; GFX950-NEXT: ;;#ASMEND
1158
1160
; GFX950-NEXT: ; implicit-def: $agpr0_agpr1
1159
1161
; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
1160
1162
; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
1161
1163
; GFX950-NEXT: s_cbranch_execz .LBB16_2
1162
1164
; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
1163
1165
; GFX950-NEXT: buffer_wbl2 sc0 sc1
1164
- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[2:3 ], v[0:1 ] sc0 sc1
1166
+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[0:1 ], v[2:3 ] sc0 sc1
1165
1167
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1166
1168
; GFX950-NEXT: buffer_inv sc0 sc1
1167
- ; GFX950-NEXT: v_accvgpr_write_b32 a0, v2
1168
- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v3
1169
1169
; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
1170
+ ; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
1171
+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1172
+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
1170
1173
; GFX950-NEXT: .LBB16_2: ; %Flow
1171
1174
; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
1172
1175
; GFX950-NEXT: s_cbranch_execz .LBB16_4
1173
1176
; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
1174
- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
1177
+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1 ]
1175
1178
; GFX950-NEXT: s_nop 1
1176
- ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
1177
- ; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v2 , off
1179
+ ; GFX950-NEXT: v_cndmask_b32_e32 v0 , -1, v0 , vcc
1180
+ ; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v0 , off
1178
1181
; GFX950-NEXT: s_nop 0
1179
- ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[0:1 ], off
1182
+ ; GFX950-NEXT: scratch_store_dwordx2 v0 , v[2:3 ], off
1180
1183
; GFX950-NEXT: .LBB16_4: ; %atomicrmw.phi
1181
1184
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
1182
1185
; GFX950-NEXT: s_waitcnt vmcnt(1)
0 commit comments