Skip to content

Commit 1becade

Browse files
authored
[AMDGPU] Update comments in memory legalizer. NFC (#160453)
1 parent 844150d commit 1becade

File tree

1 file changed

+14
-5
lines changed

1 file changed

+14
-5
lines changed

llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ class SIMemOpInfo final {
106106
bool IsLastUse = false;
107107
bool IsCooperative = false;
108108

109+
// TODO: Should we assume Cooperative=true if no MMO is present?
109110
SIMemOpInfo(
110111
const GCNSubtarget &ST,
111112
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
@@ -338,6 +339,11 @@ class SICacheControl {
338339
bool IsNonTemporal,
339340
bool IsLastUse = false) const = 0;
340341

342+
/// Add final touches to a `mayStore` instruction \p MI, which may be a
343+
/// Store or RMW instruction.
344+
/// FIXME: This takes a MI because iterators aren't handled properly. When
345+
/// this is called, they often point to entirely different insts. Thus we back
346+
/// up the inst early and pass it here instead.
341347
virtual bool finalizeStore(MachineInstr &MI, bool Atomic) const {
342348
return false;
343349
};
@@ -2381,7 +2387,10 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI,
23812387
// which shares the same L0.
23822388
//
23832389
// GFX12.5:
2384-
// TODO DOCS
2390+
// CU$ has two ports. To ensure operations are visible at the workgroup
2391+
// level, we need to ensure all operations in this port have completed
2392+
// so the other SIMDs in the WG can see them. There is no ordering
2393+
// guarantee between the ports.
23852394
if (!ST.isCuModeEnabled() || ST.hasGFX1250Insts()) {
23862395
if ((Op & SIMemOp::LOAD) != SIMemOp::NONE)
23872396
LOADCnt |= true;
@@ -2496,8 +2505,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
24962505
// Otherwise in CU mode all waves of a work-group are on the same CU, and
24972506
// so the L0 does not need to be invalidated.
24982507
//
2499-
// GFX12.5
2500-
// TODO DOCS
2508+
// GFX12.5 has a shared WGP$, so no invalidates are required.
25012509
if (ST.isCuModeEnabled())
25022510
return false;
25032511

@@ -2541,7 +2549,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
25412549
++MI;
25422550

25432551
// global_wb is only necessary at system scope for GFX12.0,
2544-
// they're also necessary at device scope for GFX12.5.
2552+
// they're also necessary at device scope for GFX12.5 as stores
2553+
// cannot report completion earlier than L2.
25452554
//
25462555
// Emitting it for lower scopes is a slow no-op, so we omit it
25472556
// for performance.
@@ -2552,7 +2561,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
25522561
Changed = true;
25532562
break;
25542563
case SIAtomicScope::AGENT:
2555-
// TODO DOCS
2564+
// GFX12.5 may have >1 L2 per device so we must emit a device scope WB.
25562565
if (ST.hasGFX1250Insts()) {
25572566
BuildMI(MBB, MI, DL, TII->get(AMDGPU::GLOBAL_WB))
25582567
.addImm(AMDGPU::CPol::SCOPE_DEV);

0 commit comments

Comments
 (0)