Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1111,8 +1111,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
{N->getOperand(0), N->getOperand(1),
CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/});
} else {
unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO
: AMDGPU::S_USUBO_PSEUDO;
unsigned Opc = IsAdd ? AMDGPU::S_UADDO_PSEUDO : AMDGPU::S_USUBO_PSEUDO;

CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
{N->getOperand(0), N->getOperand(1)});
Expand Down
60 changes: 52 additions & 8 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6081,9 +6081,6 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MachineOperand &Src0 = MI.getOperand(2);
MachineOperand &Src1 = MI.getOperand(3);
MachineOperand &Src2 = MI.getOperand(4);
unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO)
? AMDGPU::S_ADDC_U32
: AMDGPU::S_SUBB_U32;
if (Src0.isReg() && TRI->isVectorRegister(MRI, Src0.getReg())) {
Register RegOp0 = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
BuildMI(*BB, MII, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), RegOp0)
Expand Down Expand Up @@ -6132,11 +6129,11 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
.addImm(0);
}

// clang-format off
BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg())
.add(Src0)
.add(Src1);
// clang-format on
unsigned Opc = MI.getOpcode() == AMDGPU::S_ADD_CO_PSEUDO
? AMDGPU::S_ADDC_U32
: AMDGPU::S_SUBB_U32;

BuildMI(*BB, MII, DL, TII->get(Opc), Dest.getReg()).add(Src0).add(Src1);

unsigned SelOpc =
ST.isWave64() ? AMDGPU::S_CSELECT_B64 : AMDGPU::S_CSELECT_B32;
Expand Down Expand Up @@ -16579,6 +16576,53 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
}
}

// Eliminate setcc by using carryout from add/sub instruction

// LHS = ADD i64 RHS, Z LHSlo = UADDO i32 RHSlo, Zlo
// setcc LHS ult RHS -> LHSHi = UADDO_CARRY i32 RHShi, Zhi
// similarly for subtraction

// LHS = ADD i64 Y, 1 LHSlo = UADDO i32 Ylo, 1
// setcc LHS eq 0 -> LHSHi = UADDO_CARRY i32 Yhi, 0

if (VT == MVT::i64 && ((CC == ISD::SETULT &&
sd_match(LHS, m_Add(m_Specific(RHS), m_Value()))) ||
(CC == ISD::SETUGT &&
sd_match(LHS, m_Sub(m_Specific(RHS), m_Value()))) ||
(CC == ISD::SETEQ && CRHS && CRHS->isZero() &&
sd_match(LHS, m_Add(m_Value(), m_One()))))) {
bool IsAdd = LHS.getOpcode() == ISD::ADD;

SDValue Op0 = LHS.getOperand(0);
SDValue Op1 = LHS.getOperand(1);

SDValue Op0Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op0);
SDValue Op1Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Op1);

SDValue Op0Hi = getHiHalf64(Op0, DAG);
SDValue Op1Hi = getHiHalf64(Op1, DAG);

SDValue NodeLo =
DAG.getNode(IsAdd ? ISD::UADDO : ISD::USUBO, SL,
DAG.getVTList(MVT::i32, MVT::i1), {Op0Lo, Op1Lo});

SDValue CarryInHi = NodeLo.getValue(1);
SDValue NodeHi = DAG.getNode(IsAdd ? ISD::UADDO_CARRY : ISD::USUBO_CARRY,
SL, DAG.getVTList(MVT::i32, MVT::i1),
{Op0Hi, Op1Hi, CarryInHi});

SDValue ResultLo = NodeLo.getValue(0);
SDValue ResultHi = NodeHi.getValue(0);

SDValue JoinedResult =
DAG.getBuildVector(MVT::v2i32, SL, {ResultLo, ResultHi});

SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, JoinedResult);
SDValue Overflow = NodeHi.getValue(1);
DCI.CombineTo(LHS.getNode(), Result);
return Overflow;
}

if (VT != MVT::f32 && VT != MVT::f64 &&
(!Subtarget->has16BitInsts() || VT != MVT::f16))
return SDValue();
Expand Down
98 changes: 42 additions & 56 deletions llvm/test/CodeGen/AMDGPU/a-v-flat-atomicrmw.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8760,9 +8760,8 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
Expand All @@ -8781,20 +8780,19 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execz .LBB113_6
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5]
; GFX90A-NEXT: v_cndmask_b32_e32 v4, -1, v4, vcc
; GFX90A-NEXT: buffer_load_dword v0, v4, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v1, v4, s[0:3], 0 offen offset:4
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v4, vcc
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(1)
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v6
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen offset:4
; GFX90A-NEXT: buffer_store_dword v2, v4, s[0:3], 0 offen
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v7, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB113_6: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -8828,10 +8826,9 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v6
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v7, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[4:5], v[0:3] sc0
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
Expand All @@ -8857,11 +8854,11 @@ define void @flat_atomic_usub_sat_i64_ret_a_a(ptr %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v6
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v7, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
; GFX950-NEXT: scratch_store_dwordx2 v4, v[2:3], off
; GFX950-NEXT: .LBB113_6: ; %atomicrmw.phi
; GFX950-NEXT: s_or_b64 exec, exec, s[0:1]
Expand Down Expand Up @@ -8901,9 +8898,8 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], v[4:5], v[4:5] op_sel:[0,1]
; GFX90A-NEXT: v_sub_co_u32_e32 v4, vcc, v6, v2
; GFX90A-NEXT: v_subb_co_u32_e32 v5, vcc, v7, v3, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[4:5], v[6:7]
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v4, v4, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v5, v5, 0, vcc
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[4:5], v[0:1], v[4:7] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[4:5], v[6:7]
Expand All @@ -8919,18 +8915,17 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX90A-NEXT: s_cbranch_execz .LBB114_6
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
; GFX90A-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; GFX90A-NEXT: v_cndmask_b32_e32 v6, -1, v0, vcc
; GFX90A-NEXT: buffer_load_dword v4, v6, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v5, v6, s[0:3], 0 offen offset:4
; GFX90A-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc
; GFX90A-NEXT: buffer_load_dword v4, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v5, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(1)
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v4, v2
; GFX90A-NEXT: v_sub_co_u32_e32 v1, vcc, v4, v2
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v3, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[4:5]
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX90A-NEXT: v_subb_co_u32_e32 v2, vcc, v5, v3, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen
; GFX90A-NEXT: buffer_store_dword v1, v6, s[0:3], 0 offen offset:4
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_store_dword v2, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB114_6: ; %atomicrmw.phi
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -8963,10 +8958,9 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
Expand All @@ -8989,7 +8983,6 @@ define void @flat_atomic_usub_sat_i64_ret_av_av(ptr %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
Expand Down Expand Up @@ -17065,9 +17058,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
Expand All @@ -17086,20 +17078,19 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
; GFX90A-NEXT: ; %bb.5: ; %atomicrmw.private
; GFX90A-NEXT: s_cmp_lg_u64 s[4:5], 0
; GFX90A-NEXT: s_cselect_b32 s4, s4, -1
; GFX90A-NEXT: v_mov_b32_e32 v6, s4
; GFX90A-NEXT: buffer_load_dword v0, v6, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v1, v6, s[0:3], 0 offen offset:4
; GFX90A-NEXT: v_mov_b32_e32 v0, s4
; GFX90A-NEXT: buffer_load_dword v1, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_load_dword v2, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: s_waitcnt vmcnt(1)
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
; GFX90A-NEXT: v_sub_co_u32_e32 v3, vcc, v1, v4
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v0
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v3, 0, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v1
; GFX90A-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX90A-NEXT: buffer_store_dword v0, v6, s[0:3], 0 offen offset:4
; GFX90A-NEXT: buffer_store_dword v2, v6, s[0:3], 0 offen
; GFX90A-NEXT: v_subb_co_u32_e32 v4, vcc, v2, v5, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a0, v1
; GFX90A-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
; GFX90A-NEXT: v_accvgpr_write_b32 a1, v2
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v4, 0, vcc
; GFX90A-NEXT: buffer_store_dword v3, v0, s[0:3], 0 offen
; GFX90A-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen offset:4
; GFX90A-NEXT: .LBB221_6: ; %atomicrmw.phi
; GFX90A-NEXT: ;;#ASMSTART
; GFX90A-NEXT: ; use a[0:1]
Expand Down Expand Up @@ -17132,10 +17123,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v4
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v5, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[0:1], v[6:7], v[0:3] sc0
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[0:1], v[2:3]
Expand All @@ -17159,11 +17149,11 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_a_a(ptr inreg %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v0, v4
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v1, v5, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
; GFX950-NEXT: v_accvgpr_write_b32 a0, v0
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
; GFX950-NEXT: s_nop 0
; GFX950-NEXT: v_cndmask_b32_e64 v3, v3, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v2, v2, 0, vcc
; GFX950-NEXT: v_accvgpr_write_b32 a1, v1
; GFX950-NEXT: scratch_store_dwordx2 off, v[2:3], s0
; GFX950-NEXT: .LBB221_6: ; %atomicrmw.phi
; GFX950-NEXT: ;;#ASMSTART
Expand Down Expand Up @@ -17202,9 +17192,8 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], v[2:3], v[2:3] op_sel:[0,1]
; GFX90A-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
; GFX90A-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX90A-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] glc
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
Expand All @@ -17227,7 +17216,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
; GFX90A-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX90A-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX90A-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
; GFX90A-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX90A-NEXT: buffer_store_dword v0, v4, s[0:3], 0 offen
Expand Down Expand Up @@ -17263,10 +17251,9 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v2, vcc, v8, v0
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v3, vcc, v9, v1, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[8:9]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v6, v2, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v7, v3, 0, vcc
; GFX950-NEXT: flat_atomic_cmpswap_x2 v[2:3], v[4:5], v[6:9] sc0
; GFX950-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX950-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[8:9]
Expand All @@ -17287,7 +17274,6 @@ define void @flat_atomic_usub_sat_i64_saddr_ret_av_av(ptr inreg %ptr) #0 {
; GFX950-NEXT: v_sub_co_u32_e32 v0, vcc, v2, v0
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_subb_co_u32_e32 v1, vcc, v3, v1, vcc
; GFX950-NEXT: v_cmp_gt_u64_e32 vcc, v[0:1], v[2:3]
; GFX950-NEXT: s_nop 1
; GFX950-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc
; GFX950-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc
Expand Down
Loading