Skip to content

Commit 5be1306

Browse files
committed
Update tests
Signed-off-by: John Lu <[email protected]>
1 parent 054964c commit 5be1306

File tree

13 files changed

+1501
-1484
lines changed

13 files changed

+1501
-1484
lines changed

llvm/lib/Target/AMDGPU/SIISelLowering.cpp

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6081,9 +6081,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
60816081
MachineOperand &Src0 = MI.getOperand(2);
60826082
MachineOperand &Src1 = MI.getOperand(3);
60836083
MachineOperand &Src2 = MI.getOperand(4);
6084-
6085-
bool IsAdd = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO);
6086-
60876084
if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
60886085
Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
60896086
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
@@ -6103,20 +6100,13 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61036100
Src2.setReg(RegOp2);
61046101
}
61056102

6106-
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
6107-
unsigned WaveSize = TRI->getRegSizeInBits(*Src2RC);
6108-
assert(WaveSize == 64 || WaveSize == 32);
6109-
6110-
unsigned SelectOpc =
6111-
(WaveSize == 64) ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
6112-
unsigned AddcSubbOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
6113-
6114-
if (WaveSize == 64) {
6103+
if (ST.isWave64()) {
61156104
if (ST.hasScalarCompareEq64()) {
61166105
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U64))
61176106
.addReg(Src2.getReg())
61186107
.addImm(0);
61196108
} else {
6109+
const TargetRegisterClass *Src2RC = MRI.getRegClass(Src2.getReg());
61206110
const TargetRegisterClass *SubRC =
61216111
TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
61226112
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
@@ -6133,17 +6123,22 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
61336123
.addReg(Src2_32, RegState::Kill)
61346124
.addImm(0);
61356125
}
6136-
} else {
6137-
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6138-
.addReg(Src2.getReg())
6139-
.addImm(0);
6140-
}
6126+
} else {
6127+
BuildMI(*BB, MII, DL, TII->get(AMDGPU::S_CMP_LG_U32))
6128+
.addReg(Src2.getReg())
6129+
.addImm(0);
6130+
}
61416131

6142-
BuildMI(*BB, MII, DL, TII->get(AddcSubbOpc), Dest.getReg())
6143-
.add(Src0)
6144-
.add(Src1);
6132+
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
6133+
? AMDGPU::S_ADDC_U32
6134+
: AMDGPU::S_SUBB_U32;
6135+
6136+
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);
6137+
6138+
unsigned SelOpc =
6139+
ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
61456140

6146-
BuildMI(*BB, MII, DL, TII->get(SelectOpc), CarryDest.getReg())
6141+
BuildMI(*BB, MII, DL, TII->get(SelOpc), CarryDest.getReg())
61476142
.addImm(-1)
61486143
.addImm(0);
61496144

llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll

Lines changed: 42 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -8760,9 +8760,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87608760
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87618761
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
87628762
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8763-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
8764-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87658763
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8764+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
87668765
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
87678766
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
87688767
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8781,20 +8780,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
87818780
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
87828781
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
87838782
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
8784-
; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
8785-
; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
8786-
; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
8783+
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v4, vcc
8784+
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
8785+
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
87878786
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8788-
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
8787+
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v6
87898788
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8790-
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8791-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
8792-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
8793-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
8794-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
8795-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8796-
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
8797-
; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
8789+
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
8790+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
8791+
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
8792+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
8793+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
8794+
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
8795+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
87988796
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
87998797
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
88008798
; GFX90A-NEXT: ;;#ASMSTART
@@ -8828,10 +8826,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88288826
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
88298827
; GFX950-NEXT: s_nop 1
88308828
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
8831-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
88328829
; GFX950-NEXT: s_nop 1
8833-
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88348830
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8831+
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
88358832
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
88368833
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
88378834
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -8857,11 +8854,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
88578854
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
88588855
; GFX950-NEXT: s_nop 1
88598856
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
8860-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
88618857
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
8862-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
8858+
; GFX950-NEXT: s_nop 0
88638859
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
88648860
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8861+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
88658862
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
88668863
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
88678864
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
@@ -8901,9 +8898,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89018898
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
89028899
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
89038900
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
8904-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
8905-
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89068901
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
8902+
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
89078903
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
89088904
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89098905
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
@@ -8919,18 +8915,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89198915
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
89208916
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
89218917
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
8922-
; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
8923-
; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen
8924-
; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4
8918+
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
8919+
; GFX90A-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
8920+
; GFX90A-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:4
89258921
; GFX90A-NEXT: s_waitcnt vmcnt(1)
8926-
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
8922+
; GFX90A-NEXT: v_sub_co_u32_e32 v1, vcc, v4, v2
89278923
; GFX90A-NEXT: s_waitcnt vmcnt(0)
8928-
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
8929-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
8930-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
8924+
; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
89318925
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
8932-
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
8933-
; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
8926+
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
8927+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
8928+
; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
89348929
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
89358930
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
89368931
; GFX90A-NEXT: ;;#ASMSTART
@@ -8963,10 +8958,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89638958
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
89648959
; GFX950-NEXT: s_nop 1
89658960
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
8966-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
89678961
; GFX950-NEXT: s_nop 1
8968-
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89698962
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
8963+
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
89708964
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
89718965
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89728966
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -8989,7 +8983,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
89898983
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
89908984
; GFX950-NEXT: s_nop 1
89918985
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
8992-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
89938986
; GFX950-NEXT: s_nop 1
89948987
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
89958988
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
@@ -17065,9 +17058,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1706517058
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1706617059
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1706717060
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17068-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
17069-
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707017061
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17062+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1707117063
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
1707217064
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1707317065
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17086,20 +17078,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1708617078
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
1708717079
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
1708817080
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
17089-
; GFX90A-NEXT: v_mov_b32_e32 v6, s4
17090-
; GFX90A-NEXT: buffer_load_dword v0, v6, s[0:3], 0 offen
17091-
; GFX90A-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
17081+
; GFX90A-NEXT: v_mov_b32_e32 v0, s4
17082+
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
17083+
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
1709217084
; GFX90A-NEXT: s_waitcnt vmcnt(1)
17093-
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
17085+
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v4
1709417086
; GFX90A-NEXT: s_waitcnt vmcnt(0)
17095-
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17096-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
17097-
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
17098-
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
17099-
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
17100-
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17101-
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
17102-
; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
17087+
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
17088+
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
17089+
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
17090+
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
17091+
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
17092+
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
17093+
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
1710317094
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
1710417095
; GFX90A-NEXT: ;;#ASMSTART
1710517096
; GFX90A-NEXT: ; use a[0:1]
@@ -17132,10 +17123,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1713217123
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
1713317124
; GFX950-NEXT: s_nop 1
1713417125
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
17135-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1713617126
; GFX950-NEXT: s_nop 1
17137-
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713817127
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
17128+
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1713917129
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
1714017130
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714117131
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
@@ -17159,11 +17149,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
1715917149
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
1716017150
; GFX950-NEXT: s_nop 1
1716117151
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
17162-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
1716317152
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
17164-
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
17153+
; GFX950-NEXT: s_nop 0
1716517154
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
1716617155
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
17156+
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
1716717157
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
1716817158
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
1716917159
; GFX950-NEXT: ;;#ASMSTART
@@ -17202,9 +17192,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1720217192
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
1720317193
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1720417194
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17205-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
17206-
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720717195
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17196+
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1720817197
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
1720917198
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1721017199
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17227,7 +17216,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1722717216
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1722817217
; GFX90A-NEXT: s_waitcnt vmcnt(0)
1722917218
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17230-
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1723117219
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
1723217220
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1723317221
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
@@ -17263,10 +17251,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1726317251
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
1726417252
; GFX950-NEXT: s_nop 1
1726517253
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
17266-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
1726717254
; GFX950-NEXT: s_nop 1
17268-
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1726917255
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
17256+
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
1727017257
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
1727117258
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727217259
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
@@ -17287,7 +17274,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
1728717274
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
1728817275
; GFX950-NEXT: s_nop 1
1728917276
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
17290-
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
1729117277
; GFX950-NEXT: s_nop 1
1729217278
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
1729317279
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc

0 commit comments

Comments
 (0)