Skip to content

Commit 38e7ebb

Browse files
Automerge: [AMDGPU][InsertWaitCnts][NFC] Merge VMEM_ACCESS and VMEM_READ_ACCESS into a single event type (#171973)
2 parents 8fb3995 + e151434 commit 38e7ebb

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -144,8 +144,7 @@ struct HardwareLimits {
144144
};
145145

146146
#define AMDGPU_DECLARE_WAIT_EVENTS(DECL) \
147-
DECL(VMEM_ACCESS) /* vmem read & write */ \
148-
DECL(VMEM_READ_ACCESS) /* vmem read */ \
147+
DECL(VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \
149148
DECL(VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
150149
DECL(VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
151150
DECL(VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
@@ -369,8 +368,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
369368
assert(ST);
370369

371370
static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
372-
eventMask({VMEM_ACCESS, VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS,
373-
VMEM_BVH_READ_ACCESS}),
371+
eventMask(
372+
{VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}),
374373
eventMask({SMEM_ACCESS, LDS_ACCESS, GDS_ACCESS, SQ_MESSAGE}),
375374
eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
376375
EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -403,7 +402,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
403402
assert(ST);
404403

405404
static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = {
406-
eventMask({VMEM_ACCESS, VMEM_READ_ACCESS}),
405+
eventMask({VMEM_ACCESS}),
407406
eventMask({LDS_ACCESS, GDS_ACCESS}),
408407
eventMask({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
409408
EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -537,7 +536,7 @@ class SIInsertWaitcnts {
537536
switch (Inst.getOpcode()) {
538537
// FIXME: GLOBAL_INV needs to be tracked with xcnt too.
539538
case AMDGPU::GLOBAL_INV:
540-
return VMEM_READ_ACCESS; // tracked using loadcnt
539+
return VMEM_ACCESS; // tracked using loadcnt
541540
case AMDGPU::GLOBAL_WB:
542541
case AMDGPU::GLOBAL_WBINV:
543542
return VMEM_WRITE_ACCESS; // tracked using storecnt
@@ -547,7 +546,7 @@ class SIInsertWaitcnts {
547546

548547
// Maps VMEM access types to their corresponding WaitEventType.
549548
static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
550-
VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
549+
VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
551550

552551
assert(SIInstrInfo::isVMEM(Inst));
553552
// LDS DMA loads are also stores, but on the LDS side. On the VMEM side
@@ -561,7 +560,7 @@ class SIInsertWaitcnts {
561560
return VMEM_WRITE_ACCESS;
562561
}
563562
if (!ST->hasExtendedWaitCounts() || SIInstrInfo::isFLAT(Inst))
564-
return VMEM_READ_ACCESS;
563+
return VMEM_ACCESS;
565564
return VmemReadMapping[getVmemType(Inst)];
566565
}
567566

0 commit comments

Comments
 (0)