@@ -144,8 +144,7 @@ struct HardwareLimits {
144144};
145145
146146#define AMDGPU_DECLARE_WAIT_EVENTS (DECL ) \
147- DECL (VMEM_ACCESS) /* vmem read & write */ \
148- DECL (VMEM_READ_ACCESS) /* vmem read */ \
147+ DECL (VMEM_ACCESS) /* vmem read & write (pre-gfx10), vmem read (gfx10+) */ \
149148 DECL (VMEM_SAMPLER_READ_ACCESS) /* vmem SAMPLER read (gfx12+ only) */ \
150149 DECL (VMEM_BVH_READ_ACCESS) /* vmem BVH read (gfx12+ only) */ \
151150 DECL (VMEM_WRITE_ACCESS) /* vmem write that is not scratch */ \
@@ -369,8 +368,8 @@ class WaitcntGeneratorPreGFX12 : public WaitcntGenerator {
369368 assert (ST);
370369
371370 static const unsigned WaitEventMaskForInstPreGFX12[NUM_INST_CNTS] = {
372- eventMask ({VMEM_ACCESS, VMEM_READ_ACCESS, VMEM_SAMPLER_READ_ACCESS,
373- VMEM_BVH_READ_ACCESS}),
371+ eventMask (
372+ {VMEM_ACCESS, VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS}),
374373 eventMask ({SMEM_ACCESS, LDS_ACCESS, GDS_ACCESS, SQ_MESSAGE}),
375374 eventMask ({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
376375 EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -403,7 +402,7 @@ class WaitcntGeneratorGFX12Plus : public WaitcntGenerator {
403402 assert (ST);
404403
405404 static const unsigned WaitEventMaskForInstGFX12Plus[NUM_INST_CNTS] = {
406- eventMask ({VMEM_ACCESS, VMEM_READ_ACCESS }),
405+ eventMask ({VMEM_ACCESS}),
407406 eventMask ({LDS_ACCESS, GDS_ACCESS}),
408407 eventMask ({EXP_GPR_LOCK, GDS_GPR_LOCK, VMW_GPR_LOCK, EXP_PARAM_ACCESS,
409408 EXP_POS_ACCESS, EXP_LDS_ACCESS}),
@@ -537,7 +536,7 @@ class SIInsertWaitcnts {
537536 switch (Inst.getOpcode ()) {
538537 // FIXME: GLOBAL_INV needs to be tracked with xcnt too.
539538 case AMDGPU::GLOBAL_INV:
540- return VMEM_READ_ACCESS ; // tracked using loadcnt
539+ return VMEM_ACCESS ; // tracked using loadcnt
541540 case AMDGPU::GLOBAL_WB:
542541 case AMDGPU::GLOBAL_WBINV:
543542 return VMEM_WRITE_ACCESS; // tracked using storecnt
@@ -547,7 +546,7 @@ class SIInsertWaitcnts {
547546
548547 // Maps VMEM access types to their corresponding WaitEventType.
549548 static const WaitEventType VmemReadMapping[NUM_VMEM_TYPES] = {
550- VMEM_READ_ACCESS , VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
549+ VMEM_ACCESS , VMEM_SAMPLER_READ_ACCESS, VMEM_BVH_READ_ACCESS};
551550
552551 assert (SIInstrInfo::isVMEM (Inst));
553552 // LDS DMA loads are also stores, but on the LDS side. On the VMEM side
@@ -561,7 +560,7 @@ class SIInsertWaitcnts {
561560 return VMEM_WRITE_ACCESS;
562561 }
563562 if (!ST->hasExtendedWaitCounts () || SIInstrInfo::isFLAT (Inst))
564- return VMEM_READ_ACCESS ;
563+ return VMEM_ACCESS ;
565564 return VmemReadMapping[getVmemType (Inst)];
566565 }
567566
0 commit comments