@@ -969,37 +969,38 @@ define void @flat_atomic_xchg_i64_ret_av_av(ptr %ptr) #0 {
969969; GFX950: ; %bb.0:
970970; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
971971; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
972- ; GFX950-NEXT: v_lshl_add_u64 v[4:5 ], v[0:1], 0, s[0:1]
972+ ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
973973; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
974- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
974+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
975975; GFX950-NEXT: ;;#ASMSTART
976- ; GFX950-NEXT: ; def v[0:1 ]
976+ ; GFX950-NEXT: ; def v[4:5 ]
977977; GFX950-NEXT: ;;#ASMEND
978- ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
978+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
979979; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
980980; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
981981; GFX950-NEXT: s_cbranch_execz .LBB14_2
982982; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
983983; GFX950-NEXT: buffer_wbl2 sc0 sc1
984- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[4:5 ], v[0:1 ] sc0 sc1
984+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[2:3 ], v[4:5 ] sc0 sc1
985985; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
986986; GFX950-NEXT: buffer_inv sc0 sc1
987+ ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
987988; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
988989; GFX950-NEXT: .LBB14_2: ; %Flow
989990; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
990991; GFX950-NEXT: s_cbranch_execz .LBB14_4
991992; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
992- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5 ]
993+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
993994; GFX950-NEXT: s_nop 1
994- ; GFX950-NEXT: v_cndmask_b32_e32 v4 , -1, v4 , vcc
995- ; GFX950-NEXT: scratch_load_dwordx2 v[2:3 ], v4 , off
995+ ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
996+ ; GFX950-NEXT: scratch_load_dwordx2 v[0:1 ], v2 , off
996997; GFX950-NEXT: s_nop 0
997- ; GFX950-NEXT: scratch_store_dwordx2 v4 , v[0:1 ], off
998+ ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[4:5 ], off
998999; GFX950-NEXT: .LBB14_4: ; %atomicrmw.phi
9991000; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
10001001; GFX950-NEXT: s_waitcnt vmcnt(1)
10011002; GFX950-NEXT: ;;#ASMSTART
1002- ; GFX950-NEXT: ; use v[2:3 ]
1003+ ; GFX950-NEXT: ; use v[0:1 ]
10031004; GFX950-NEXT: ;;#ASMEND
10041005; GFX950-NEXT: s_waitcnt vmcnt(0)
10051006; GFX950-NEXT: s_setpc_b64 s[30:31]
@@ -1058,37 +1059,38 @@ define void @flat_atomic_xchg_i64_ret_av_v(ptr %ptr) #0 {
10581059; GFX950: ; %bb.0:
10591060; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10601061; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
1061- ; GFX950-NEXT: v_lshl_add_u64 v[4:5 ], v[0:1], 0, s[0:1]
1062+ ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
10621063; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
1063- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v5
1064+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
10641065; GFX950-NEXT: ;;#ASMSTART
1065- ; GFX950-NEXT: ; def v[0:1 ]
1066+ ; GFX950-NEXT: ; def v[4:5 ]
10661067; GFX950-NEXT: ;;#ASMEND
1067- ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
1068+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
10681069; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
10691070; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
10701071; GFX950-NEXT: s_cbranch_execz .LBB15_2
10711072; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
10721073; GFX950-NEXT: buffer_wbl2 sc0 sc1
1073- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[4:5 ], v[0:1 ] sc0 sc1
1074+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[2:3 ], v[4:5 ] sc0 sc1
10741075; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
10751076; GFX950-NEXT: buffer_inv sc0 sc1
1077+ ; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
10761078; GFX950-NEXT: ; implicit-def: $vgpr4_vgpr5
10771079; GFX950-NEXT: .LBB15_2: ; %Flow
10781080; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
10791081; GFX950-NEXT: s_cbranch_execz .LBB15_4
10801082; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
1081- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5 ]
1083+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
10821084; GFX950-NEXT: s_nop 1
1083- ; GFX950-NEXT: v_cndmask_b32_e32 v4 , -1, v4 , vcc
1084- ; GFX950-NEXT: scratch_load_dwordx2 v[2:3 ], v4 , off
1085+ ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
1086+ ; GFX950-NEXT: scratch_load_dwordx2 v[0:1 ], v2 , off
10851087; GFX950-NEXT: s_nop 0
1086- ; GFX950-NEXT: scratch_store_dwordx2 v4 , v[0:1 ], off
1088+ ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[4:5 ], off
10871089; GFX950-NEXT: .LBB15_4: ; %atomicrmw.phi
10881090; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
10891091; GFX950-NEXT: s_waitcnt vmcnt(1)
10901092; GFX950-NEXT: ;;#ASMSTART
1091- ; GFX950-NEXT: ; use v[2:3 ]
1093+ ; GFX950-NEXT: ; use v[0:1 ]
10921094; GFX950-NEXT: ;;#ASMEND
10931095; GFX950-NEXT: s_waitcnt vmcnt(0)
10941096; GFX950-NEXT: s_setpc_b64 s[30:31]
@@ -1149,34 +1151,35 @@ define void @flat_atomic_xchg_i64_ret_av_a(ptr %ptr) #0 {
11491151; GFX950: ; %bb.0:
11501152; GFX950-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
11511153; GFX950-NEXT: s_mov_b64 s[0:1], 0x50
1152- ; GFX950-NEXT: v_lshl_add_u64 v[2:3 ], v[0:1], 0, s[0:1]
1154+ ; GFX950-NEXT: v_lshl_add_u64 v[0:1 ], v[0:1], 0, s[0:1]
11531155; GFX950-NEXT: s_mov_b64 s[0:1], src_private_base
1154- ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v3
1156+ ; GFX950-NEXT: v_cmp_ne_u32_e32 vcc, s1, v1
11551157; GFX950-NEXT: ;;#ASMSTART
1156- ; GFX950-NEXT: ; def v[0:1 ]
1158+ ; GFX950-NEXT: ; def v[2:3 ]
11571159; GFX950-NEXT: ;;#ASMEND
11581160; GFX950-NEXT: ; implicit-def: $agpr0_agpr1
11591161; GFX950-NEXT: s_and_saveexec_b64 s[0:1], vcc
11601162; GFX950-NEXT: s_xor_b64 s[0:1], exec, s[0:1]
11611163; GFX950-NEXT: s_cbranch_execz .LBB16_2
11621164; GFX950-NEXT: ; %bb.1: ; %atomicrmw.global
11631165; GFX950-NEXT: buffer_wbl2 sc0 sc1
1164- ; GFX950-NEXT: flat_atomic_swap_x2 v[2:3 ], v[2:3 ], v[0:1 ] sc0 sc1
1166+ ; GFX950-NEXT: flat_atomic_swap_x2 v[0:1 ], v[0:1 ], v[2:3 ] sc0 sc1
11651167; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
11661168; GFX950-NEXT: buffer_inv sc0 sc1
1167- ; GFX950-NEXT: v_accvgpr_write_b32 a0, v2
1168- ; GFX950-NEXT: v_accvgpr_write_b32 a1, v3
11691169; GFX950-NEXT: ; implicit-def: $vgpr2_vgpr3
1170+ ; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
1171+ ; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1172+ ; GFX950-NEXT: ; implicit-def: $vgpr0_vgpr1
11701173; GFX950-NEXT: .LBB16_2: ; %Flow
11711174; GFX950-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1]
11721175; GFX950-NEXT: s_cbranch_execz .LBB16_4
11731176; GFX950-NEXT: ; %bb.3: ; %atomicrmw.private
1174- ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[2:3 ]
1177+ ; GFX950-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1 ]
11751178; GFX950-NEXT: s_nop 1
1176- ; GFX950-NEXT: v_cndmask_b32_e32 v2 , -1, v2 , vcc
1177- ; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v2 , off
1179+ ; GFX950-NEXT: v_cndmask_b32_e32 v0 , -1, v0 , vcc
1180+ ; GFX950-NEXT: scratch_load_dwordx2 a[0:1], v0 , off
11781181; GFX950-NEXT: s_nop 0
1179- ; GFX950-NEXT: scratch_store_dwordx2 v2 , v[0:1 ], off
1182+ ; GFX950-NEXT: scratch_store_dwordx2 v0 , v[2:3 ], off
11801183; GFX950-NEXT: .LBB16_4: ; %atomicrmw.phi
11811184; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
11821185; GFX950-NEXT: s_waitcnt vmcnt(1)
0 commit comments