diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 5e297c7540c48..dd7c1914d3440 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -1204,6 +1204,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) { fixGetRegWaitIdle(MI); if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug()) fixDsAtomicAsyncBarrierArriveB64(MI); + if (ST.hasScratchBaseForwardingHazard()) + fixScratchBaseForwardingHazard(MI); } static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI, @@ -3468,3 +3470,79 @@ bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) { return true; } + +bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) { + // No reason to check this in pre-RA scheduling, SGPRs have to be allocated + // for hazard to trigger. + if (!IsHazardRecognizerMode) + return false; + + const SIRegisterInfo *TRI = ST.getRegisterInfo(); + const SIInstrInfo *TII = ST.getInstrInfo(); + // Hazard expires after 10 SGPR writes by SALU or 8 SGPR writes by VALU. + const int FlatScrBaseWaitStates = 10; + + bool ReadsFlatScrLo = + MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, TRI); + bool ReadsFlatScrHi = + MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, TRI); + if (isSGetReg(MI->getOpcode())) { + switch (getHWReg(TII, *MI)) { + default: + break; + case AMDGPU::Hwreg::ID_FLAT_SCR_LO: + ReadsFlatScrLo = true; + break; + case AMDGPU::Hwreg::ID_FLAT_SCR_HI: + ReadsFlatScrHi = true; + break; + } + } + + const MachineRegisterInfo &MRI = MF.getRegInfo(); + + auto IsRegDefHazard = [&](Register Reg) -> bool { + DenseSet Visited; + auto IsHazardFn = [TRI, Reg](const MachineInstr &MI) { + return MI.modifiesRegister(Reg, TRI); + }; + + // This literally abuses the idea of waitstates. Instead of waitstates it + // returns 1 for SGPR written and 0 otherwise. + auto IsSGPRDef = [TII, TRI, &MRI](const MachineInstr &MI) -> unsigned { + if (!TII->isSALU(MI) && !TII->isVALU(MI)) + return 0; + for (const MachineOperand &MO : MI.all_defs()) { + if (TRI->isSGPRReg(MRI, MO.getReg())) + return 1; + } + return 0; + }; + + auto IsExpiredFn = [=](const MachineInstr &MI, int SgprWrites) { + if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) { + unsigned Wait = MI.getOperand(0).getImm(); + if (AMDGPU::DepCtr::decodeFieldSaSdst(Wait) == 0 && + AMDGPU::DepCtr::decodeFieldVaSdst(Wait) == 0) + return true; + } + return SgprWrites >= FlatScrBaseWaitStates; + }; + + return ::getWaitStatesSince( + IsHazardFn, MI->getParent(), std::next(MI->getReverseIterator()), + 0, IsExpiredFn, Visited, IsSGPRDef) < FlatScrBaseWaitStates; + }; + + if ((!ReadsFlatScrLo || MRI.isConstantPhysReg(AMDGPU::SGPR102) || + !IsRegDefHazard(AMDGPU::SGPR102)) && + (!ReadsFlatScrHi || MRI.isConstantPhysReg(AMDGPU::SGPR103) || + !IsRegDefHazard(AMDGPU::SGPR103))) + return false; + + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(AMDGPU::S_WAITCNT_DEPCTR)) + .addImm(AMDGPU::DepCtr::encodeFieldVaSdst( + AMDGPU::DepCtr::encodeFieldSaSdst(0), 0)); + return true; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index 890d5cbd154d6..e0982b46424b9 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -112,6 +112,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { bool fixRequiredExportPriority(MachineInstr *MI); bool fixGetRegWaitIdle(MachineInstr *MI); bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI); + bool fixScratchBaseForwardingHazard(MachineInstr *MI); int checkMAIHazards(MachineInstr *MI); int checkMAIHazards908(MachineInstr *MI); diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h index 436f5c0801fad..404a476a3076a 100644 --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -1821,6 +1821,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo, bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const { return getGeneration() == GFX12; } + + // Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base + // read. + bool hasScratchBaseForwardingHazard() const { + return GFX1250Insts && getGeneration() == GFX12; + } }; class GCNUserSGPRUsageInfo { diff --git a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir index f1dbabf1e1a83..f4596b0832d97 100644 --- a/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir +++ b/llvm/test/CodeGen/AMDGPU/hazards-gfx1250.mir @@ -15,3 +15,481 @@ body: | ; GCN-NEXT: S_WAITCNT_DEPCTR 65507 DS_ATOMIC_ASYNC_BARRIER_ARRIVE_B64 $vgpr1, 0, 0, implicit-def $asynccnt, implicit $asynccnt, implicit $exec ... + +--- +name: write_s102_read_flat_scr_base_lo +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_read_flat_scr_base + ; GCN: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr102 = S_MOV_B32 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s103_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s103_read_flat_scr_base + ; GCN: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr103 = S_MOV_B32 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s102_s103_read_flat_scr_base +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_s103_read_flat_scr_base + ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base + $sgpr102_sgpr103 = S_MOV_B64 0 + $sgpr0_sgpr1 = S_MOV_B64 $src_flat_scratch_base +... + +--- +name: write_s102_getreg_flat_scr_base_lo +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_getreg_flat_scr_base_lo + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 20, implicit $mode + $sgpr102 = S_MOV_B32 0 + $sgpr1 = S_GETREG_B32 20, implicit $mode +... + +--- +name: write_s103_getreg_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_getreg_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode + $sgpr103 = S_MOV_B32 0 + $sgpr1 = S_GETREG_B32 21, implicit $mode +... + +--- +name: write_s102_s103_getreg_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + ; GCN-LABEL: name: write_s102_s103_getreg_flat_scr_base_hi + ; GCN: $sgpr102_sgpr103 = S_MOV_B64 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $sgpr1 = S_GETREG_B32 21, implicit $mode + $sgpr102_sgpr103 = S_MOV_B64 0 + $sgpr1 = S_GETREG_B32 21, implicit $mode +... + +--- +name: write_s102_read_flat_scr_base_lo_9_salu_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_9_salu_valu + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_10_salu_valu_expired + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: $sgpr10 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $sgpr10 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi_9_salu_valu +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_9_salu_valu + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s103_read_flat_scr_base_hi_10_salu_valu_expired + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr0 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2_sgpr3 = S_MOV_B64 0 + ; GCN-NEXT: $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + ; GCN-NEXT: $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc_lo, implicit $vcc_lo, implicit $exec + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + ; GCN-NEXT: $sgpr10 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr103 = S_MOV_B32 0 + $sgpr0 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2_sgpr3 = S_MOV_B64 0 + $vgpr1, $vcc_lo = V_ADDC_U32_e64 0, $vgpr0, $sgpr0, 0, implicit $exec + $vgpr2 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec + ; NOP does not count because it does not write SGPRs + S_NOP 0 + ; DS_READ_B32 does not count because it is not SALU or VALU + $vgpr3 = DS_READ_B32 $vgpr0, 0, 0, implicit $m0, implicit $exec + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + ; S_LOAD_DWORDX2_IMM does not count because it is not SALU + $sgpr8_sgpr9 = S_LOAD_DWORDX2_IMM renamable $sgpr4_sgpr5, 0, 0 + $sgpr10 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_hi_no_hazard +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_hi_no_hazard + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_expired_by_wait0 +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait0 + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 0 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 0 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_expired_by_wait_vs_sdst_sa_sdst + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 61950 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_va_sdst_only + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61951 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 61951 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_not_expired_by_wait_sa_sdst_only + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 65534 + ; GCN-NEXT: S_NOP 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + S_WAITCNT_DEPCTR 65534 + S_NOP 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +... + +--- +name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi +tracksRegLiveness: true +body: | + bb.0: + liveins: $vgpr0 + ; GCN-LABEL: name: write_s102_write_s103_read_flat_scr_base_lo_read_flat_scr_base_hi + ; GCN: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr103 = S_MOV_B32 0 + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + ; GCN-NEXT: $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec + $sgpr102 = S_MOV_B32 0 + $sgpr103 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $src_flat_scratch_base_hi, $vgpr0, implicit $exec +... + +--- +name: write_s102_read_flat_scr_base_lo_cross_blocks +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: write_s102_read_flat_scr_base_lo_cross_blocks + ; GCN: bb.0: + ; GCN-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000) + ; GCN-NEXT: liveins: $vgpr0, $sgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 + ; GCN-NEXT: S_CBRANCH_SCC0 %bb.2, implicit $scc + ; GCN-NEXT: S_BRANCH %bb.1 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.1: + ; GCN-NEXT: successors: %bb.2(0x80000000) + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: $sgpr102 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr1 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr2 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr3 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr4 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr5 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr6 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr7 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr8 = S_MOV_B32 0 + ; GCN-NEXT: $sgpr9 = S_MOV_B32 0 + ; GCN-NEXT: S_BRANCH %bb.2 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: bb.2: + ; GCN-NEXT: liveins: $vgpr0 + ; GCN-NEXT: {{ $}} + ; GCN-NEXT: S_WAITCNT_DEPCTR 61950 + ; GCN-NEXT: $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec + bb.0: + liveins: $vgpr0, $sgpr0 + $sgpr102 = S_ADD_U32 $sgpr0, 0, implicit-def $scc + $sgpr1 = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 0 + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + $sgpr8 = S_MOV_B32 0 + S_CBRANCH_SCC0 %bb.2, implicit $scc + S_BRANCH %bb.1 + + bb.1: + liveins: $vgpr0 + $sgpr102 = S_MOV_B32 0 + $sgpr1 = S_MOV_B32 0 + $sgpr2 = S_MOV_B32 0 + $sgpr3 = S_MOV_B32 0 + $sgpr4 = S_MOV_B32 0 + $sgpr5 = S_MOV_B32 0 + $sgpr6 = S_MOV_B32 0 + $sgpr7 = S_MOV_B32 0 + $sgpr8 = S_MOV_B32 0 + $sgpr9 = S_MOV_B32 0 + S_BRANCH %bb.2 + + bb.2: + liveins: $vgpr0 + $vgpr0 = V_ADD_U32_e32 $src_flat_scratch_base_lo, $vgpr0, implicit $exec +...