Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions llvm/lib/Target/AMDGPU/AMDGPUIGroupLP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2420,29 +2420,29 @@ bool SchedGroup::canAddMI(const MachineInstr &MI) const {
Result = true;

else if (((SGMask & SchedGroupMask::VMEM) != SchedGroupMask::NONE) &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
TII->isVMEM(MI))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I right in thinking that the isDS test was redundant, since no instructions are both FLAT and DS? @kerbowa

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Otherwise, it should check for !isLDSDMA if we specifically want to avoid DS accesses, I think

Result = true;

else if (((SGMask & SchedGroupMask::VMEM_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
TII->isVMEM(MI))
Result = true;

else if (((SGMask & SchedGroupMask::VMEM_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() &&
(TII->isVMEM(MI) || (TII->isFLAT(MI) && !TII->isDS(MI))))
TII->isVMEM(MI))
Result = true;

else if (((SGMask & SchedGroupMask::DS) != SchedGroupMask::NONE) &&
TII->isDS(MI))
(TII->isDS(MI) || TII->isLDSDMA(MI)))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How is this change related to changing the definition of isVMEM??

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, it's not. I think it was just bothering me as I was going through the instruction detection. I should probably move this to another PR or just let it be.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

Result = true;

else if (((SGMask & SchedGroupMask::DS_READ) != SchedGroupMask::NONE) &&
MI.mayLoad() && TII->isDS(MI))
MI.mayLoad() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
Result = true;

else if (((SGMask & SchedGroupMask::DS_WRITE) != SchedGroupMask::NONE) &&
MI.mayStore() && TII->isDS(MI))
MI.mayStore() && (TII->isDS(MI) || TII->isLDSDMA(MI)))
Result = true;

else if (((SGMask & SchedGroupMask::TRANS) != SchedGroupMask::NONE) &&
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/AMDGPUWaitSGPRHazards.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ class AMDGPUWaitSGPRHazards {
State.ActiveFlat = true;

// SMEM or VMEM clears hazards
if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isSMRD(*MI)) {
// FIXME: adapt to add FLAT without VALU (so !isLDSDMA())?
if ((SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI)) || SIInstrInfo::isSMRD(*MI)) {
State.VCCHazard = HazardState::None;
State.SALUHazards.reset();
State.VALUHazards.reset();
Expand Down
49 changes: 20 additions & 29 deletions llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,7 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
if (ST.hasNoDataDepHazard())
return NoHazard;

// FIXME: Should flat be considered vmem?
if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI))
&& checkVMEMHazards(MI) > 0)
if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
return HazardType;

if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
Expand All @@ -202,8 +199,8 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return HazardType;

if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
checkMAIVALUHazards(MI) > 0)
return HazardType;

if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
Expand All @@ -230,7 +227,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return HazardType;

if ((SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI)) && checkMAILdStHazards(MI) > 0)
return HazardType;

Expand Down Expand Up @@ -324,7 +320,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
if (ST.hasNoDataDepHazard())
return WaitStates;

if (SIInstrInfo::isVMEM(*MI) || SIInstrInfo::isFLAT(*MI))
if (SIInstrInfo::isVMEM(*MI))
WaitStates = std::max(WaitStates, checkVMEMHazards(MI));

if (SIInstrInfo::isVALU(*MI))
Expand All @@ -340,8 +336,8 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));

if ((SIInstrInfo::isVALU(*MI) || SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) || SIInstrInfo::isDS(*MI) ||
SIInstrInfo::isEXP(*MI)) && checkMAIVALUHazards(MI) > 0)
SIInstrInfo::isDS(*MI) || SIInstrInfo::isEXP(*MI)) &&
checkMAIVALUHazards(MI) > 0)
WaitStates = std::max(WaitStates, checkMAIVALUHazards(MI));

if (MI->isInlineAsm())
Expand Down Expand Up @@ -370,7 +366,6 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
return std::max(WaitStates, checkMAIHazards(MI));

if (SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI))
return std::max(WaitStates, checkMAILdStHazards(MI));

Expand Down Expand Up @@ -598,7 +593,7 @@ static bool breaksSMEMSoftClause(MachineInstr *MI) {
}

static bool breaksVMEMSoftClause(MachineInstr *MI) {
return !SIInstrInfo::isVMEM(*MI) && !SIInstrInfo::isFLAT(*MI);
return !SIInstrInfo::isVMEM(*MI);
}

int GCNHazardRecognizer::checkSoftClauseHazards(MachineInstr *MEM) {
Expand Down Expand Up @@ -1250,8 +1245,7 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
const SIRegisterInfo *TRI = ST.getRegisterInfo();

auto IsHazardFn = [TRI, MI](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I) &&
!SIInstrInfo::isFLAT(I))
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;

for (const MachineOperand &Def : MI->defs()) {
Expand Down Expand Up @@ -1424,9 +1418,8 @@ static bool shouldRunLdsBranchVmemWARHazardFixup(const MachineFunction &MF,
bool HasVmem = false;
for (auto &MBB : MF) {
for (auto &MI : MBB) {
HasLds |= SIInstrInfo::isDS(MI);
HasVmem |=
SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI);
HasLds |= SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI);
HasVmem |= SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI);
if (HasLds && HasVmem)
return true;
}
Expand All @@ -1448,9 +1441,9 @@ bool GCNHazardRecognizer::fixLdsBranchVmemWARHazard(MachineInstr *MI) {
assert(!ST.hasExtendedWaitCounts());

auto IsHazardInst = [](const MachineInstr &MI) {
if (SIInstrInfo::isDS(MI))
if (SIInstrInfo::isDS(MI) || SIInstrInfo::isLDSDMA(MI))
return 1;
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI))
return 2;
return 0;
};
Expand Down Expand Up @@ -1517,8 +1510,8 @@ bool GCNHazardRecognizer::fixLdsDirectVALUHazard(MachineInstr *MI) {
if (WaitStates >= NoHazardWaitStates)
return true;
// Instructions which cause va_vdst==0 expire hazard
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I);
return SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I);
};
auto GetWaitStatesFn = [](const MachineInstr &MI) {
return SIInstrInfo::isVALU(MI) ? 1 : 0;
Expand Down Expand Up @@ -1549,8 +1542,7 @@ bool GCNHazardRecognizer::fixLdsDirectVMEMHazard(MachineInstr *MI) {
const Register VDSTReg = VDST->getReg();

auto IsHazardFn = [this, VDSTReg](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I) &&
!SIInstrInfo::isDS(I))
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isDS(I))
return false;
return I.readsRegister(VDSTReg, &TRI) || I.modifiesRegister(VDSTReg, &TRI);
};
Expand Down Expand Up @@ -1635,8 +1627,8 @@ bool GCNHazardRecognizer::fixVALUPartialForwardingHazard(MachineInstr *MI) {
return HazardExpired;

// Instructions which cause va_vdst==0 expire hazard
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
AMDGPU::DepCtr::decodeFieldVaVdst(I.getOperand(0).getImm()) == 0))
return HazardExpired;
Expand Down Expand Up @@ -1772,8 +1764,8 @@ bool GCNHazardRecognizer::fixVALUTransUseHazard(MachineInstr *MI) {
return HazardExpired;

// Instructions which cause va_vdst==0 expire hazard
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isFLAT(I) ||
SIInstrInfo::isDS(I) || SIInstrInfo::isEXP(I) ||
if (SIInstrInfo::isVMEM(I) || SIInstrInfo::isDS(I) ||
SIInstrInfo::isEXP(I) ||
(I.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
I.getOperand(0).getImm() == 0x0fff))
return HazardExpired;
Expand Down Expand Up @@ -2003,7 +1995,7 @@ int GCNHazardRecognizer::checkFPAtomicToDenormModeHazard(MachineInstr *MI) {
return 0;

auto IsHazardFn = [](const MachineInstr &I) {
if (!SIInstrInfo::isVMEM(I) && !SIInstrInfo::isFLAT(I))
if (!SIInstrInfo::isVMEM(I))
return false;
return SIInstrInfo::isFPAtomic(I);
};
Expand Down Expand Up @@ -2626,7 +2618,6 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
int WaitStatesNeeded = 0;

bool IsMem = SIInstrInfo::isVMEM(*MI) ||
SIInstrInfo::isFLAT(*MI) ||
SIInstrInfo::isDS(*MI);
bool IsMemOrExport = IsMem || SIInstrInfo::isEXP(*MI);
bool IsVALU = SIInstrInfo::isVALU(*MI);
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,8 @@ void AMDGPUCustomBehaviour::generateWaitCntInfo() {
bool AMDGPUCustomBehaviour::isVMEM(const MCInstrDesc &MCID) {
return MCID.TSFlags & SIInstrFlags::MUBUF ||
MCID.TSFlags & SIInstrFlags::MTBUF ||
MCID.TSFlags & SIInstrFlags::MIMG;
MCID.TSFlags & SIInstrFlags::MIMG ||
MCID.TSFlags & SIInstrFlags::FLAT;
}

// taken from SIInstrInfo::hasModifiersSet()
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ FunctionPass *llvm::createSIFormMemoryClausesLegacyPass() {
}

static bool isVMEMClauseInst(const MachineInstr &MI) {
return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
return SIInstrInfo::isVMEM(MI);
}

static bool isSMEMClauseInst(const MachineInstr &MI) {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class SIInsertHardClauses {
HardClauseType getHardClauseType(const MachineInstr &MI) {
if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
if (ST->hasNSAClauseBug()) {
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
Expand All @@ -121,7 +121,7 @@ class SIInsertHardClauses {
: HARDCLAUSE_MIMG_LOAD
: HARDCLAUSE_MIMG_STORE;
}
if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
if (SIInstrInfo::isVMEM(MI) && !SIInstrInfo::isLDSDMA(MI)) {
return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
: HARDCLAUSE_VMEM_LOAD
: HARDCLAUSE_VMEM_STORE;
Expand Down
14 changes: 7 additions & 7 deletions llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ static const unsigned instrsForExtendedCounterTypes[NUM_EXTENDED_INST_CNTS] = {
AMDGPU::S_WAIT_KMCNT};

static bool updateVMCntOnly(const MachineInstr &Inst) {
return SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLATGlobal(Inst) ||
SIInstrInfo::isFLATScratch(Inst);
return (SIInstrInfo::isVMEM(Inst) && !SIInstrInfo::isFLAT(Inst)) ||
SIInstrInfo::isFLATGlobal(Inst) || SIInstrInfo::isFLATScratch(Inst);
}

#ifndef NDEBUG
Expand Down Expand Up @@ -695,14 +695,14 @@ class SIInsertWaitcnts {
#endif // NDEBUG
}

// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM or
// FLAT instruction.
// Return the appropriate VMEM_*_ACCESS type for Inst, which must be a VMEM
// instruction.
WaitEventType getVmemWaitEventType(const MachineInstr &Inst) const {
// Maps VMEM access types to their corresponding WaitEventType.
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};

assert(SIInstrInfo::isVMEM(Inst) || SIInstrInfo::isFLAT(Inst));
assert(SIInstrInfo::isVMEM(Inst));
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
// these should use VM_CNT.
if (!ST->hasVscnt() || SIInstrInfo::mayWriteLDSThroughDMA(Inst))
Expand Down Expand Up @@ -2454,8 +2454,8 @@ bool SIInsertWaitcnts::isPreheaderToFlush(
}

bool SIInsertWaitcnts::isVMEMOrFlatVMEM(const MachineInstr &MI) const {
return SIInstrInfo::isVMEM(MI) ||
(SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI));
return (SIInstrInfo::isFLAT(MI) && mayAccessVMEMThroughFlat(MI)) ||
SIInstrInfo::isVMEM(MI);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This amounts to just isVMEM. I'm not sure why this is bothering to specially handle the case where a flat instruction is statically known to only access LDS. I think the only way that would happen is if we had a volatile flat access to an LDS variable, which is mildly useful.

This should probably be something like if (isFLAT()) return mayAccessVMEMThroughFlat(); else // other non-flat cases

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done. But are you sure it's a good idea to list the elements of isVMEM separately?

}

// Return true if it is better to flush the vmcnt counter in the preheader of
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -449,7 +449,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
}

static bool isVMEM(const MachineInstr &MI) {
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI);
if (isFLAT(MI))
assert(usesVM_CNT(MI) && "oh no");
return isMUBUF(MI) || isMTBUF(MI) || isImage(MI) || isFLAT(MI);
}

bool isVMEM(uint16_t Opcode) const {
Expand Down
21 changes: 15 additions & 6 deletions llvm/test/CodeGen/AMDGPU/hard-clauses.mir
Original file line number Diff line number Diff line change
Expand Up @@ -630,20 +630,29 @@ body: |
; CHECK-LABEL: name: flat_global_load
; CHECK: liveins: $vgpr0_vgpr1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
; CHECK-NEXT: S_CLAUSE 1
; CHECK-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; CHECK-NEXT: }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is bad. FLAT and GLOBAL instructions can't be mixed in a clause (at least by GFX10 rules).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok, I thought that it's no problem here since they both access the same address and thus the FLAT_LOAD is actually a GLOBAL_LOAD, too. (Or am I mistaken?)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is a problem. There are rules for what types of instruction can be claused together, and this pass has to respect the rules. That is why I think this patch should be NFC. Any behavioral changes can be discussed separately, in separate PRs, to see if they are OK or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup, I already changed this PR to NFC. Just wanted to verify my understanding of this test

;
; GFX11-LABEL: name: flat_global_load
; GFX11: liveins: $vgpr0_vgpr1
; GFX11-NEXT: {{ $}}
; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; GFX11-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
; GFX11-NEXT: S_CLAUSE 1
; GFX11-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX11-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; GFX11-NEXT: }
;
; GFX12-LABEL: name: flat_global_load
; GFX12: liveins: $vgpr0_vgpr1
; GFX12-NEXT: {{ $}}
; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; GFX12-NEXT: BUNDLE implicit-def $vgpr2, implicit-def $vgpr2_lo16, implicit-def $vgpr2_hi16, implicit-def $vgpr3, implicit-def $vgpr3_lo16, implicit-def $vgpr3_hi16, implicit $vgpr0_vgpr1, implicit $exec, implicit $flat_scr {
; GFX12-NEXT: S_CLAUSE 1
; GFX12-NEXT: $vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
; GFX12-NEXT: $vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
; GFX12-NEXT: }
$vgpr2 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
$vgpr3 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 4, 0, implicit $exec, implicit $flat_scr
...
Expand Down
7 changes: 5 additions & 2 deletions llvm/test/CodeGen/AMDGPU/lds-branch-vmem-hazard.mir
Original file line number Diff line number Diff line change
Expand Up @@ -269,11 +269,14 @@ body: |
S_ENDPGM 0
...

# GCN-LABEL: name: no_hazard_lds_branch_flat
# FLAT_* instructions are "based on per-thread address (VGPR), can load/store:
# global memory, LDS or scratch memory" (RDNA4 ISA)
# GCN-LABEL: name: hazard_lds_branch_flat
# GCN: bb.1:
# GFX10-NEXT: S_WAITCNT_VSCNT undef $sgpr_null, 0
# GCN-NEXT: FLAT_LOAD_DWORD
---
name: no_hazard_lds_branch_flat
name: hazard_lds_branch_flat
body: |
bb.0:
successors: %bb.1
Expand Down
Loading