Skip to content

Commit f1fc507

Browse files
authored
[AMDGPU] w/a hazard with writing s102/103 and reading FLAT_SCRATCH_BASE (#153878)
1 parent 9f302ed commit f1fc507

File tree

4 files changed

+563
-0
lines changed

4 files changed

+563
-0
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1204,6 +1204,8 @@ void GCNHazardRecognizer::fixHazards(MachineInstr *MI) {
12041204
fixGetRegWaitIdle(MI);
12051205
if (ST.hasDsAtomicAsyncBarrierArriveB64PipeBug())
12061206
fixDsAtomicAsyncBarrierArriveB64(MI);
1207+
if (ST.hasScratchBaseForwardingHazard())
1208+
fixScratchBaseForwardingHazard(MI);
12071209
}
12081210

12091211
static bool isVCmpXWritesExec(const SIInstrInfo &TII, const SIRegisterInfo &TRI,
@@ -3468,3 +3470,79 @@ bool GCNHazardRecognizer::fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI) {
34683470

34693471
return true;
34703472
}
3473+
3474+
bool GCNHazardRecognizer::fixScratchBaseForwardingHazard(MachineInstr *MI) {
3475+
// No reason to check this in pre-RA scheduling, SGPRs have to be allocated
3476+
// for hazard to trigger.
3477+
if (!IsHazardRecognizerMode)
3478+
return false;
3479+
3480+
const SIRegisterInfo *TRI = ST.getRegisterInfo();
3481+
const SIInstrInfo *TII = ST.getInstrInfo();
3482+
// Hazard expires after 10 SGPR writes by SALU or 8 SGPR writes by VALU.
3483+
const int FlatScrBaseWaitStates = 10;
3484+
3485+
bool ReadsFlatScrLo =
3486+
MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_LO, TRI);
3487+
bool ReadsFlatScrHi =
3488+
MI->readsRegister(AMDGPU::SRC_FLAT_SCRATCH_BASE_HI, TRI);
3489+
if (isSGetReg(MI->getOpcode())) {
3490+
switch (getHWReg(TII, *MI)) {
3491+
default:
3492+
break;
3493+
case AMDGPU::Hwreg::ID_FLAT_SCR_LO:
3494+
ReadsFlatScrLo = true;
3495+
break;
3496+
case AMDGPU::Hwreg::ID_FLAT_SCR_HI:
3497+
ReadsFlatScrHi = true;
3498+
break;
3499+
}
3500+
}
3501+
3502+
const MachineRegisterInfo &MRI = MF.getRegInfo();
3503+
3504+
auto IsRegDefHazard = [&](Register Reg) -> bool {
3505+
DenseSet<const MachineBasicBlock *> Visited;
3506+
auto IsHazardFn = [TRI, Reg](const MachineInstr &MI) {
3507+
return MI.modifiesRegister(Reg, TRI);
3508+
};
3509+
3510+
// This literally abuses the idea of waitstates. Instead of waitstates it
3511+
// returns 1 for SGPR written and 0 otherwise.
3512+
auto IsSGPRDef = [TII, TRI, &MRI](const MachineInstr &MI) -> unsigned {
3513+
if (!TII->isSALU(MI) && !TII->isVALU(MI))
3514+
return 0;
3515+
for (const MachineOperand &MO : MI.all_defs()) {
3516+
if (TRI->isSGPRReg(MRI, MO.getReg()))
3517+
return 1;
3518+
}
3519+
return 0;
3520+
};
3521+
3522+
auto IsExpiredFn = [=](const MachineInstr &MI, int SgprWrites) {
3523+
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR) {
3524+
unsigned Wait = MI.getOperand(0).getImm();
3525+
if (AMDGPU::DepCtr::decodeFieldSaSdst(Wait) == 0 &&
3526+
AMDGPU::DepCtr::decodeFieldVaSdst(Wait) == 0)
3527+
return true;
3528+
}
3529+
return SgprWrites >= FlatScrBaseWaitStates;
3530+
};
3531+
3532+
return ::getWaitStatesSince(
3533+
IsHazardFn, MI->getParent(), std::next(MI->getReverseIterator()),
3534+
0, IsExpiredFn, Visited, IsSGPRDef) < FlatScrBaseWaitStates;
3535+
};
3536+
3537+
if ((!ReadsFlatScrLo || MRI.isConstantPhysReg(AMDGPU::SGPR102) ||
3538+
!IsRegDefHazard(AMDGPU::SGPR102)) &&
3539+
(!ReadsFlatScrHi || MRI.isConstantPhysReg(AMDGPU::SGPR103) ||
3540+
!IsRegDefHazard(AMDGPU::SGPR103)))
3541+
return false;
3542+
3543+
BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
3544+
TII->get(AMDGPU::S_WAITCNT_DEPCTR))
3545+
.addImm(AMDGPU::DepCtr::encodeFieldVaSdst(
3546+
AMDGPU::DepCtr::encodeFieldSaSdst(0), 0));
3547+
return true;
3548+
}

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
112112
bool fixRequiredExportPriority(MachineInstr *MI);
113113
bool fixGetRegWaitIdle(MachineInstr *MI);
114114
bool fixDsAtomicAsyncBarrierArriveB64(MachineInstr *MI);
115+
bool fixScratchBaseForwardingHazard(MachineInstr *MI);
115116

116117
int checkMAIHazards(MachineInstr *MI);
117118
int checkMAIHazards908(MachineInstr *MI);

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1821,6 +1821,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
18211821
bool hasDsAtomicAsyncBarrierArriveB64PipeBug() const {
18221822
return getGeneration() == GFX12;
18231823
}
1824+
1825+
// Requires s_wait_alu(0) after s102/s103 write and src_flat_scratch_base
1826+
// read.
1827+
bool hasScratchBaseForwardingHazard() const {
1828+
return GFX1250Insts && getGeneration() == GFX12;
1829+
}
18241830
};
18251831

18261832
class GCNUserSGPRUsageInfo {

0 commit comments

Comments
 (0)