|
25 | 25 | #include "llvm/IR/DiagnosticInfo.h" |
26 | 26 | #include "llvm/IR/MemoryModelRelaxationAnnotations.h" |
27 | 27 | #include "llvm/IR/PassManager.h" |
| 28 | +#include "llvm/Support/AMDGPUAddrSpace.h" |
28 | 29 | #include "llvm/Support/AtomicOrdering.h" |
29 | 30 | #include "llvm/TargetParser/TargetParser.h" |
30 | 31 |
|
@@ -277,6 +278,12 @@ class SIMemOpAccess final { |
277 | 278 | /// rmw operation, "std::nullopt" otherwise. |
278 | 279 | std::optional<SIMemOpInfo> |
279 | 280 | getAtomicCmpxchgOrRmwInfo(const MachineBasicBlock::iterator &MI) const; |
| 281 | + |
| 282 | + /// \returns DMA to LDS info if \p MI is as a direct-to/from-LDS load/store, |
| 283 | + /// along with an indication of whether this is a load or store. If it is not |
| 284 | + /// a direct-to-LDS operation, returns std::nullopt. |
| 285 | + std::optional<SIMemOpInfo> |
| 286 | + getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const; |
280 | 287 | }; |
281 | 288 |
|
282 | 289 | class SICacheControl { |
@@ -703,6 +710,9 @@ class SIMemoryLegalizer final { |
703 | 710 | /// instructions are added/deleted or \p MI is modified, false otherwise. |
704 | 711 | bool expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, |
705 | 712 | MachineBasicBlock::iterator &MI); |
| 713 | + /// Expands LDS DMA operation \p MI. Returns true if instructions are |
| 714 | + /// added/deleted or \p MI is modified, false otherwise. |
| 715 | + bool expandLDSDMA(const SIMemOpInfo &MOI, MachineBasicBlock::iterator &MI); |
706 | 716 |
|
707 | 717 | public: |
708 | 718 | SIMemoryLegalizer(const MachineModuleInfo &MMI) : MMI(MMI) {}; |
@@ -832,6 +842,9 @@ SIAtomicAddrSpace SIMemOpAccess::toSIAtomicAddrSpace(unsigned AS) const { |
832 | 842 | return SIAtomicAddrSpace::SCRATCH; |
833 | 843 | if (AS == AMDGPUAS::REGION_ADDRESS) |
834 | 844 | return SIAtomicAddrSpace::GDS; |
| 845 | + if (AS == AMDGPUAS::BUFFER_FAT_POINTER || AS == AMDGPUAS::BUFFER_RESOURCE || |
| 846 | + AS == AMDGPUAS::BUFFER_STRIDED_POINTER) |
| 847 | + return SIAtomicAddrSpace::GLOBAL; |
835 | 848 |
|
836 | 849 | return SIAtomicAddrSpace::OTHER; |
837 | 850 | } |
@@ -987,6 +1000,16 @@ std::optional<SIMemOpInfo> SIMemOpAccess::getAtomicCmpxchgOrRmwInfo( |
987 | 1000 | return constructFromMIWithMMO(MI); |
988 | 1001 | } |
989 | 1002 |
|
| 1003 | +std::optional<SIMemOpInfo> |
| 1004 | +SIMemOpAccess::getLDSDMAInfo(const MachineBasicBlock::iterator &MI) const { |
| 1005 | + assert(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic); |
| 1006 | + |
| 1007 | + if (!SIInstrInfo::isLDSDMA(*MI)) |
| 1008 | + return std::nullopt; |
| 1009 | + |
| 1010 | + return constructFromMIWithMMO(MI); |
| 1011 | +} |
| 1012 | + |
990 | 1013 | SICacheControl::SICacheControl(const GCNSubtarget &ST) : ST(ST) { |
991 | 1014 | TII = ST.getInstrInfo(); |
992 | 1015 | IV = getIsaVersion(ST.getCPU()); |
@@ -1099,7 +1122,7 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( |
1099 | 1122 | // Only handle load and store, not atomic read-modify-write insructions. The |
1100 | 1123 | // latter use glc to indicate if the atomic returns a result and so must not |
1101 | 1124 | // be used for cache control. |
1102 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 1125 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
1103 | 1126 |
|
1104 | 1127 | // Only update load and store, not LLVM IR atomic read-modify-write |
1105 | 1128 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -1429,7 +1452,7 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( |
1429 | 1452 | // Only handle load and store, not atomic read-modify-write insructions. The |
1430 | 1453 | // latter use glc to indicate if the atomic returns a result and so must not |
1431 | 1454 | // be used for cache control. |
1432 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 1455 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
1433 | 1456 |
|
1434 | 1457 | // Only update load and store, not LLVM IR atomic read-modify-write |
1435 | 1458 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -1733,7 +1756,7 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( |
1733 | 1756 | // Only handle load and store, not atomic read-modify-write insructions. The |
1734 | 1757 | // latter use glc to indicate if the atomic returns a result and so must not |
1735 | 1758 | // be used for cache control. |
1736 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 1759 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
1737 | 1760 |
|
1738 | 1761 | // Only update load and store, not LLVM IR atomic read-modify-write |
1739 | 1762 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -1968,7 +1991,7 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( |
1968 | 1991 | // Only handle load and store, not atomic read-modify-write insructions. The |
1969 | 1992 | // latter use glc to indicate if the atomic returns a result and so must not |
1970 | 1993 | // be used for cache control. |
1971 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 1994 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
1972 | 1995 |
|
1973 | 1996 | // Only update load and store, not LLVM IR atomic read-modify-write |
1974 | 1997 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -2266,7 +2289,7 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal( |
2266 | 2289 | // Only handle load and store, not atomic read-modify-write insructions. The |
2267 | 2290 | // latter use glc to indicate if the atomic returns a result and so must not |
2268 | 2291 | // be used for cache control. |
2269 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 2292 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
2270 | 2293 |
|
2271 | 2294 | // Only update load and store, not LLVM IR atomic read-modify-write |
2272 | 2295 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -2611,7 +2634,7 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( |
2611 | 2634 | bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const { |
2612 | 2635 |
|
2613 | 2636 | // Only handle load and store, not atomic read-modify-write instructions. |
2614 | | - assert(MI->mayLoad() ^ MI->mayStore()); |
| 2637 | + assert((MI->mayLoad() ^ MI->mayStore()) || SIInstrInfo::isLDSDMA(*MI)); |
2615 | 2638 |
|
2616 | 2639 | // Only update load and store, not LLVM IR atomic read-modify-write |
2617 | 2640 | // instructions. The latter are always marked as volatile so cannot sensibly |
@@ -2934,6 +2957,23 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, |
2934 | 2957 | return Changed; |
2935 | 2958 | } |
2936 | 2959 |
|
| 2960 | +bool SIMemoryLegalizer::expandLDSDMA(const SIMemOpInfo &MOI, |
| 2961 | + MachineBasicBlock::iterator &MI) { |
| 2962 | + assert(MI->mayLoad() && MI->mayStore()); |
| 2963 | + |
| 2964 | + // The volatility or nontemporal-ness of the operation is a |
| 2965 | + // function of the global memory, not the LDS. |
| 2966 | + SIMemOp OpKind = |
| 2967 | + SIInstrInfo::mayWriteLDSThroughDMA(*MI) ? SIMemOp::LOAD : SIMemOp::STORE; |
| 2968 | + |
| 2969 | + // Handle volatile and/or nontemporal markers on direct-to-LDS loads and |
| 2970 | + // stores. The operation is treated as a volatile/nontemporal store |
| 2971 | + // to its second argument. |
| 2972 | + return CC->enableVolatileAndOrNonTemporal( |
| 2973 | + MI, MOI.getInstrAddrSpace(), OpKind, MOI.isVolatile(), |
| 2974 | + MOI.isNonTemporal(), MOI.isLastUse()); |
| 2975 | +} |
| 2976 | + |
2937 | 2977 | bool SIMemoryLegalizerLegacy::runOnMachineFunction(MachineFunction &MF) { |
2938 | 2978 | const MachineModuleInfo &MMI = |
2939 | 2979 | getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
@@ -2985,14 +3025,17 @@ bool SIMemoryLegalizer::run(MachineFunction &MF) { |
2985 | 3025 | if (!(MI->getDesc().TSFlags & SIInstrFlags::maybeAtomic)) |
2986 | 3026 | continue; |
2987 | 3027 |
|
2988 | | - if (const auto &MOI = MOA.getLoadInfo(MI)) |
| 3028 | + if (const auto &MOI = MOA.getLoadInfo(MI)) { |
2989 | 3029 | Changed |= expandLoad(*MOI, MI); |
2990 | | - else if (const auto &MOI = MOA.getStoreInfo(MI)) { |
| 3030 | + } else if (const auto &MOI = MOA.getStoreInfo(MI)) { |
2991 | 3031 | Changed |= expandStore(*MOI, MI); |
2992 | | - } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) |
| 3032 | + } else if (const auto &MOI = MOA.getLDSDMAInfo(MI)) { |
| 3033 | + Changed |= expandLDSDMA(*MOI, MI); |
| 3034 | + } else if (const auto &MOI = MOA.getAtomicFenceInfo(MI)) { |
2993 | 3035 | Changed |= expandAtomicFence(*MOI, MI); |
2994 | | - else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) |
| 3036 | + } else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI)) { |
2995 | 3037 | Changed |= expandAtomicCmpxchgOrRmw(*MOI, MI); |
| 3038 | + } |
2996 | 3039 | } |
2997 | 3040 | } |
2998 | 3041 |
|
|
0 commit comments