@@ -299,6 +299,10 @@ class SICacheControl {
299
299
bool enableNamedBit (const MachineBasicBlock::iterator MI,
300
300
AMDGPU::CPol::CPol Bit) const ;
301
301
302
+ // / Check if any atomic operation on AS can affect memory accessible via the
303
+ // / global address space.
304
+ bool canAffectGlobalAddrSpace (SIAtomicAddrSpace AS) const ;
305
+
302
306
public:
303
307
304
308
// / Create a cache control for the subtarget \p ST.
@@ -991,6 +995,15 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
991
995
return true ;
992
996
}
993
997
998
+ bool SICacheControl::canAffectGlobalAddrSpace (SIAtomicAddrSpace AS) const {
999
+ assert ((!ST.hasGloballyAddressableScratch () ||
1000
+ (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ||
1001
+ (AS & SIAtomicAddrSpace::SCRATCH) == SIAtomicAddrSpace::NONE) &&
1002
+ " scratch instructions should already be replaced by flat "
1003
+ " instructions if GloballyAddressableScratch is enabled" );
1004
+ return (AS & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE;
1005
+ }
1006
+
994
1007
/* static */
995
1008
std::unique_ptr<SICacheControl> SICacheControl::create (const GCNSubtarget &ST) {
996
1009
GCNSubtarget::Generation Generation = ST.getGeneration ();
@@ -1016,7 +1029,7 @@ bool SIGfx6CacheControl::enableLoadCacheBypass(
1016
1029
assert (MI->mayLoad () && !MI->mayStore ());
1017
1030
bool Changed = false ;
1018
1031
1019
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1032
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1020
1033
switch (Scope) {
1021
1034
case SIAtomicScope::SYSTEM:
1022
1035
case SIAtomicScope::AGENT:
@@ -1239,7 +1252,7 @@ bool SIGfx6CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1239
1252
if (Pos == Position::AFTER)
1240
1253
++MI;
1241
1254
1242
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1255
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1243
1256
switch (Scope) {
1244
1257
case SIAtomicScope::SYSTEM:
1245
1258
case SIAtomicScope::AGENT:
@@ -1299,7 +1312,7 @@ bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1299
1312
if (Pos == Position::AFTER)
1300
1313
++MI;
1301
1314
1302
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1315
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1303
1316
switch (Scope) {
1304
1317
case SIAtomicScope::SYSTEM:
1305
1318
case SIAtomicScope::AGENT:
@@ -1336,7 +1349,7 @@ bool SIGfx90ACacheControl::enableLoadCacheBypass(
1336
1349
assert (MI->mayLoad () && !MI->mayStore ());
1337
1350
bool Changed = false ;
1338
1351
1339
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1352
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1340
1353
switch (Scope) {
1341
1354
case SIAtomicScope::SYSTEM:
1342
1355
case SIAtomicScope::AGENT:
@@ -1378,7 +1391,7 @@ bool SIGfx90ACacheControl::enableRMWCacheBypass(
1378
1391
assert (MI->mayLoad () && MI->mayStore ());
1379
1392
bool Changed = false ;
1380
1393
1381
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1394
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1382
1395
switch (Scope) {
1383
1396
case SIAtomicScope::SYSTEM:
1384
1397
case SIAtomicScope::AGENT:
@@ -1487,7 +1500,7 @@ bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1487
1500
if (Pos == Position::AFTER)
1488
1501
++MI;
1489
1502
1490
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1503
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1491
1504
switch (Scope) {
1492
1505
case SIAtomicScope::SYSTEM:
1493
1506
// Ensures that following loads will not see stale remote VMEM data or
@@ -1551,7 +1564,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
1551
1564
if (Pos == Position::AFTER)
1552
1565
++MI;
1553
1566
1554
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1567
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1555
1568
switch (Scope) {
1556
1569
case SIAtomicScope::SYSTEM:
1557
1570
// Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1594,7 +1607,7 @@ bool SIGfx940CacheControl::enableLoadCacheBypass(
1594
1607
assert (MI->mayLoad () && !MI->mayStore ());
1595
1608
bool Changed = false ;
1596
1609
1597
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1610
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1598
1611
switch (Scope) {
1599
1612
case SIAtomicScope::SYSTEM:
1600
1613
// Set SC bits to indicate system scope.
@@ -1638,7 +1651,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass(
1638
1651
assert (!MI->mayLoad () && MI->mayStore ());
1639
1652
bool Changed = false ;
1640
1653
1641
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1654
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1642
1655
switch (Scope) {
1643
1656
case SIAtomicScope::SYSTEM:
1644
1657
// Set SC bits to indicate system scope.
@@ -1678,7 +1691,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
1678
1691
assert (MI->mayLoad () && MI->mayStore ());
1679
1692
bool Changed = false ;
1680
1693
1681
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1694
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1682
1695
switch (Scope) {
1683
1696
case SIAtomicScope::SYSTEM:
1684
1697
// Set SC1 bit to indicate system scope.
@@ -1756,7 +1769,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
1756
1769
if (Pos == Position::AFTER)
1757
1770
++MI;
1758
1771
1759
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1772
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1760
1773
switch (Scope) {
1761
1774
case SIAtomicScope::SYSTEM:
1762
1775
// Ensures that following loads will not see stale remote VMEM data or
@@ -1840,7 +1853,7 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
1840
1853
if (Pos == Position::AFTER)
1841
1854
++MI;
1842
1855
1843
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1856
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1844
1857
switch (Scope) {
1845
1858
case SIAtomicScope::SYSTEM:
1846
1859
// Inserting a "S_WAITCNT vmcnt(0)" before is not required because the
@@ -1897,7 +1910,7 @@ bool SIGfx10CacheControl::enableLoadCacheBypass(
1897
1910
assert (MI->mayLoad () && !MI->mayStore ());
1898
1911
bool Changed = false ;
1899
1912
1900
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
1913
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
1901
1914
switch (Scope) {
1902
1915
case SIAtomicScope::SYSTEM:
1903
1916
case SIAtomicScope::AGENT:
@@ -2129,7 +2142,7 @@ bool SIGfx10CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2129
2142
if (Pos == Position::AFTER)
2130
2143
++MI;
2131
2144
2132
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2145
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
2133
2146
switch (Scope) {
2134
2147
case SIAtomicScope::SYSTEM:
2135
2148
case SIAtomicScope::AGENT:
@@ -2194,7 +2207,7 @@ bool SIGfx11CacheControl::enableLoadCacheBypass(
2194
2207
assert (MI->mayLoad () && !MI->mayStore ());
2195
2208
bool Changed = false ;
2196
2209
2197
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2210
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
2198
2211
switch (Scope) {
2199
2212
case SIAtomicScope::SYSTEM:
2200
2213
case SIAtomicScope::AGENT:
@@ -2462,7 +2475,7 @@ bool SIGfx12CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
2462
2475
// / memory.
2463
2476
2464
2477
// / Other address spaces do not have a cache.
2465
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) == SIAtomicAddrSpace::NONE )
2478
+ if (! canAffectGlobalAddrSpace (AddrSpace) )
2466
2479
return false ;
2467
2480
2468
2481
AMDGPU::CPol::CPol ScopeImm = AMDGPU::CPol::SCOPE_DEV;
@@ -2523,7 +2536,7 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
2523
2536
// writeback as all memory operations by the same thread are
2524
2537
// sequentially consistent, and no other thread can access scratch
2525
2538
// memory.
2526
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2539
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
2527
2540
if (Pos == Position::AFTER)
2528
2541
++MI;
2529
2542
@@ -2655,7 +2668,7 @@ bool SIGfx12CacheControl::setAtomicScope(const MachineBasicBlock::iterator &MI,
2655
2668
SIAtomicAddrSpace AddrSpace) const {
2656
2669
bool Changed = false ;
2657
2670
2658
- if ((AddrSpace & SIAtomicAddrSpace::GLOBAL) != SIAtomicAddrSpace::NONE ) {
2671
+ if (canAffectGlobalAddrSpace (AddrSpace) ) {
2659
2672
switch (Scope) {
2660
2673
case SIAtomicScope::SYSTEM:
2661
2674
Changed |= setScope (MI, AMDGPU::CPol::SCOPE_SYS);
0 commit comments