@@ -299,6 +299,10 @@ class SICacheControl {
299299 bool enableNamedBit (const MachineBasicBlock::iterator MI,
300300 AMDGPU::CPol::CPol Bit) const ;
301301
302+ // / Check if any atomic operation on AS can affect memory accessible via the
303+ // / global address space.
304+ bool canAffectGlobalAddrSpace (SIAtomicAddrSpace AS) const ;
305+
302306public:
303307
304308 // / Create a cache control for the subtarget \p ST.
@@ -991,6 +995,15 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
991995 return true ;
992996}
993997
998+ bool SICacheControl::canAffectGlobalAddrSpace (SIAtomicAddrSpace AS) const {
999+ assert ((!ST.hasGloballyAddressableScratch () ||
1000+ (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1001+ (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1002+ " scratch instructions should already be replaced by flat "
1003+ " instructions if GloballyAddressableScratch is enabled" );
1004+ return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1005+ }
1006+
9941007/* static */
9951008std::unique_ptr<SICacheControl> SICacheControl::create (const GCNSubtarget &ST) {
9961009 GCNSubtarget::Generation Generation = ST.getGeneration ();
@@ -1016,7 +1029,7 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
10161029 assert (MI->mayLoad () && !MI->mayStore ());
10171030 bool Changed = false ;
10181031
1019- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1032+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
10201033 switch (Scope) {
10211034 case SIAtomicScope::SYSTEM:
10221035 case SIAtomicScope::AGENT:
@@ -1239,7 +1252,7 @@ bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
12391252 if (Pos == Position::AFTER)
12401253 ++MI;
12411254
1242- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1255+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
12431256 switch (Scope) {
12441257 case SIAtomicScope::SYSTEM:
12451258 case SIAtomicScope::AGENT:
@@ -1299,7 +1312,7 @@ bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
12991312 if (Pos == Position::AFTER)
13001313 ++MI;
13011314
1302- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1315+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
13031316 switch (Scope) {
13041317 case SIAtomicScope::SYSTEM:
13051318 case SIAtomicScope::AGENT:
@@ -1336,7 +1349,7 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
13361349 assert (MI->mayLoad () && !MI->mayStore ());
13371350 bool Changed = false ;
13381351
1339- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1352+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
13401353 switch (Scope) {
13411354 case SIAtomicScope::SYSTEM:
13421355 case SIAtomicScope::AGENT:
@@ -1378,7 +1391,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass(
13781391 assert (MI->mayLoad () && MI->mayStore ());
13791392 bool Changed = false ;
13801393
1381- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1394+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
13821395 switch (Scope) {
13831396 case SIAtomicScope::SYSTEM:
13841397 case SIAtomicScope::AGENT:
@@ -1487,7 +1500,7 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
14871500 if (Pos == Position::AFTER)
14881501 ++MI;
14891502
1490- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1503+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
14911504 switch (Scope) {
14921505 case SIAtomicScope::SYSTEM:
14931506 // Ensures that following loads will not see stale remote VMEM data or
@@ -1551,7 +1564,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
15511564 if (Pos == Position::AFTER)
15521565 ++MI;
15531566
1554- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1567+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
15551568 switch (Scope) {
15561569 case SIAtomicScope::SYSTEM:
15571570 // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1594,7 +1607,7 @@ bool SIGfx940CacheControl::enableLoadCacheBypass(
15941607 assert (MI->mayLoad () && !MI->mayStore ());
15951608 bool Changed = false ;
15961609
1597- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1610+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
15981611 switch (Scope) {
15991612 case SIAtomicScope::SYSTEM:
16001613 // Set SC bits to indicate system scope.
@@ -1638,7 +1651,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass(
16381651 assert (!MI->mayLoad () && MI->mayStore ());
16391652 bool Changed = false ;
16401653
1641- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1654+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
16421655 switch (Scope) {
16431656 case SIAtomicScope::SYSTEM:
16441657 // Set SC bits to indicate system scope.
@@ -1678,7 +1691,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
16781691 assert (MI->mayLoad () && MI->mayStore ());
16791692 bool Changed = false ;
16801693
1681- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1694+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
16821695 switch (Scope) {
16831696 case SIAtomicScope::SYSTEM:
16841697 // Set SC1 bit to indicate system scope.
@@ -1756,7 +1769,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
17561769 if (Pos == Position::AFTER)
17571770 ++MI;
17581771
1759- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1772+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
17601773 switch (Scope) {
17611774 case SIAtomicScope::SYSTEM:
17621775 // Ensures that following loads will not see stale remote VMEM data or
@@ -1840,7 +1853,7 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
18401853 if (Pos == Position::AFTER)
18411854 ++MI;
18421855
1843- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1856+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
18441857 switch (Scope) {
18451858 case SIAtomicScope::SYSTEM:
18461859 // Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1897,7 +1910,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
18971910 assert (MI->mayLoad () && !MI->mayStore ());
18981911 bool Changed = false ;
18991912
1900- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1913+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
19011914 switch (Scope) {
19021915 case SIAtomicScope::SYSTEM:
19031916 case SIAtomicScope::AGENT:
@@ -2129,7 +2142,7 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
21292142 if (Pos == Position::AFTER)
21302143 ++MI;
21312144
2132- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2145+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
21332146 switch (Scope) {
21342147 case SIAtomicScope::SYSTEM:
21352148 case SIAtomicScope::AGENT:
@@ -2194,7 +2207,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass(
21942207 assert (MI->mayLoad () && !MI->mayStore ());
21952208 bool Changed = false ;
21962209
2197- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2210+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
21982211 switch (Scope) {
21992212 case SIAtomicScope::SYSTEM:
22002213 case SIAtomicScope::AGENT:
@@ -2462,7 +2475,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
24622475 // / memory.
24632476
24642477 // / Other address spaces do not have a cache.
2465- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE )
2478+ if (! canAffectGlobalAddrSpace (AddrSpace) )
24662479 return false ;
24672480
24682481 AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
@@ -2523,7 +2536,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
25232536 // writeback as all memory operations by the same thread are
25242537 // sequentially consistent, and no other thread can access scratch
25252538 // memory.
2526- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2539+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
25272540 if (Pos == Position::AFTER)
25282541 ++MI;
25292542
@@ -2655,7 +2668,7 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
26552668 SIAtomicAddrSpace AddrSpace) const {
26562669 bool Changed = false ;
26572670
2658- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2671+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
26592672 switch (Scope) {
26602673 case SIAtomicScope::SYSTEM:
26612674 Changed |= setScope (MI, AMDGPU::CPol::SCOPE_SYS);
0 commit comments