|
33 | 33 | #include "llvm/IR/IRBuilder.h"
|
34 | 34 | #include "llvm/Support/Casting.h"
|
35 | 35 | #include "llvm/Support/FormatVariadic.h"
|
| 36 | +#include "llvm/Support/NVPTXAddrSpace.h" |
36 | 37 | #include "llvm/Support/raw_ostream.h"
|
37 | 38 | #include <cassert>
|
38 | 39 | #include <optional>
|
@@ -1236,30 +1237,70 @@ LogicalResult NVVM::PrefetchOp::verify() {
|
1236 | 1237 | unsigned addressSpace =
|
1237 | 1238 | llvm::cast<LLVM::LLVMPointerType>(getAddr().getType()).getAddressSpace();
|
1238 | 1239 | std::optional<NVVM::CacheEvictionPriority> evictPriority = getEvictPriority();
|
| 1240 | + std::optional<NVVM::PrefetchCacheLevel> cacheLevel = getCacheLevel(); |
1239 | 1241 |
|
1240 |
| - if (getUniform()) { |
1241 |
| - if (getCacheLevel() != CacheLevel::L1) |
1242 |
| - return emitOpError("unsupported cache level, the only supported uniform " |
1243 |
| - "cache level is L1"); |
| 1242 | + if (getTensormap() && cacheLevel) |
| 1243 | + return emitOpError("cannot specify both tensormap and cache level"); |
1244 | 1244 |
|
1245 |
| - if (addressSpace != MemSpace::kGenericMemorySpace) |
| 1245 | + if (getTensormap()) { |
| 1246 | + if (addressSpace != MemSpace::kGenericMemorySpace && |
| 1247 | + addressSpace != MemSpace::kConstantMemorySpace) { |
1246 | 1248 | return emitOpError(
|
1247 |
| - "prefetch to uniform cache requires a generic pointer"); |
1248 |
| - } |
| 1249 | + "prefetch tensormap requires a generic or constant pointer"); |
| 1250 | + } |
1249 | 1251 |
|
1250 |
| - if (evictPriority) { |
1251 |
| - if (getCacheLevel() != CacheLevel::L2) |
| 1252 | + if (evictPriority) { |
1252 | 1253 | return emitOpError(
|
1253 |
| - "cache eviction priority supported only for cache level L2"); |
1254 |
| - |
1255 |
| - if (addressSpace != MemSpace::kGlobalMemorySpace) |
1256 |
| - return emitOpError("cache eviction priority requires a global pointer"); |
| 1254 | + "prefetch tensormap does not support eviction priority"); |
| 1255 | + } |
1257 | 1256 |
|
1258 |
| - if (*evictPriority != NVVM::CacheEvictionPriority::EvictNormal && |
1259 |
| - *evictPriority != NVVM::CacheEvictionPriority::EvictLast) |
| 1257 | + if (getInParamSpace() && addressSpace != MemSpace::kGenericMemorySpace) { |
1260 | 1258 | return emitOpError(
|
1261 |
| - "unsupported cache eviction priority, only evict_last and " |
1262 |
| - "evict_normal are supported"); |
| 1259 | + "in_param_space can only be specified for a generic pointer"); |
| 1260 | + } |
| 1261 | + |
| 1262 | + } else if (cacheLevel) { |
| 1263 | + if (addressSpace != MemSpace::kGenericMemorySpace && |
| 1264 | + addressSpace != MemSpace::kGlobalMemorySpace && |
| 1265 | + addressSpace != MemSpace::kLocalMemorySpace) { |
| 1266 | + return emitOpError("prefetch to cache level requires a generic, global, " |
| 1267 | + "or local pointer"); |
| 1268 | + } |
| 1269 | + |
| 1270 | + if (getUniform()) { |
| 1271 | + if (*cacheLevel != CacheLevel::L1) { |
| 1272 | + return emitOpError( |
| 1273 | + "unsupported cache level, the only supported uniform " |
| 1274 | + "cache level is L1"); |
| 1275 | + } |
| 1276 | + |
| 1277 | + if (addressSpace != MemSpace::kGenericMemorySpace) { |
| 1278 | + return emitOpError( |
| 1279 | + "prefetch to uniform cache requires a generic pointer"); |
| 1280 | + } |
| 1281 | + } |
| 1282 | + |
| 1283 | + if (evictPriority) { |
| 1284 | + if (*cacheLevel != CacheLevel::L2) |
| 1285 | + return emitOpError( |
| 1286 | + "cache eviction priority supported only for cache level L2"); |
| 1287 | + |
| 1288 | + if (addressSpace != MemSpace::kGlobalMemorySpace) |
| 1289 | + return emitOpError("cache eviction priority requires a global pointer"); |
| 1290 | + |
| 1291 | + if (*evictPriority != NVVM::CacheEvictionPriority::EvictNormal && |
| 1292 | + *evictPriority != NVVM::CacheEvictionPriority::EvictLast) |
| 1293 | + return emitOpError( |
| 1294 | + "unsupported cache eviction priority, only evict_last and " |
| 1295 | + "evict_normal are supported"); |
| 1296 | + } |
| 1297 | + |
| 1298 | + if (getPredicate()) |
| 1299 | + return emitOpError("predicate supported only on prefetch tensormap"); |
| 1300 | + |
| 1301 | + } else { |
| 1302 | + return emitOpError( |
| 1303 | + "requires specification of either cache level or tensormap"); |
1263 | 1304 | }
|
1264 | 1305 |
|
1265 | 1306 | return success();
|
@@ -1794,43 +1835,69 @@ NVVM::IDArgPair DotAccumulate2WayOp::getIntrinsicIDAndArgs(
|
1794 | 1835 | return {ids[type], args};
|
1795 | 1836 | }
|
1796 | 1837 |
|
1797 |
| -llvm::Intrinsic::ID PrefetchOp::getIntrinsicID(NVVM::PrefetchOp &op) { |
| 1838 | +static llvm::Value *getParamCastedAddr(llvm::Value *addr, |
| 1839 | + llvm::IRBuilderBase &builder) { |
| 1840 | + return builder.CreateAddrSpaceCast( |
| 1841 | + addr, |
| 1842 | + llvm::PointerType::get(builder.getContext(), |
| 1843 | + llvm::NVPTXAS::AddressSpace::ADDRESS_SPACE_PARAM)); |
| 1844 | +} |
| 1845 | + |
| 1846 | +NVVM::IDArgPair |
| 1847 | +PrefetchOp::getIntrinsicIDAndArgs(NVVM::PrefetchOp &op, |
| 1848 | + LLVM::ModuleTranslation &mt, |
| 1849 | + llvm::IRBuilderBase &builder) { |
1798 | 1850 | using MemSpace = NVVM::NVVMMemorySpace;
|
1799 | 1851 | using CacheLevel = NVVM::PrefetchCacheLevel;
|
1800 | 1852 |
|
1801 |
| - NVVM::PrefetchCacheLevel cacheLevel = op.getCacheLevel(); |
| 1853 | + std::optional<NVVM::PrefetchCacheLevel> cacheLevel = op.getCacheLevel(); |
1802 | 1854 | std::optional<NVVM::CacheEvictionPriority> evictPriority =
|
1803 | 1855 | op.getEvictPriority();
|
1804 | 1856 | unsigned addressSpace =
|
1805 | 1857 | llvm::cast<LLVM::LLVMPointerType>(op.getAddr().getType())
|
1806 | 1858 | .getAddressSpace();
|
1807 | 1859 |
|
1808 |
| - if (op.getUniform() && cacheLevel == CacheLevel::L1) |
1809 |
| - return llvm::Intrinsic::nvvm_prefetchu_L1; |
| 1860 | + llvm::SmallVector<llvm::Value *> args; |
| 1861 | + llvm::Value *addr = mt.lookupValue(op.getAddr()); |
| 1862 | + args.push_back(op.getInParamSpace() ? getParamCastedAddr(addr, builder) |
| 1863 | + : addr); |
| 1864 | + |
| 1865 | + if (op.getTensormap()) |
| 1866 | + return {llvm::Intrinsic::nvvm_prefetch_tensormap, args}; |
| 1867 | + |
| 1868 | + assert(cacheLevel && "expected cache level for non-tensormap prefetch"); |
| 1869 | + |
| 1870 | + if (op.getUniform() && *cacheLevel == CacheLevel::L1) |
| 1871 | + return {llvm::Intrinsic::nvvm_prefetchu_L1, args}; |
1810 | 1872 |
|
1811 |
| - if (evictPriority && cacheLevel == CacheLevel::L2) { |
| 1873 | + if (evictPriority && *cacheLevel == CacheLevel::L2) { |
1812 | 1874 | switch (*evictPriority) {
|
1813 | 1875 | case NVVM::CacheEvictionPriority::EvictLast:
|
1814 |
| - return llvm::Intrinsic::nvvm_prefetch_global_L2_evict_last; |
| 1876 | + return {llvm::Intrinsic::nvvm_prefetch_global_L2_evict_last, args}; |
1815 | 1877 | case NVVM::CacheEvictionPriority::EvictNormal:
|
1816 |
| - return llvm::Intrinsic::nvvm_prefetch_global_L2_evict_normal; |
| 1878 | + return {llvm::Intrinsic::nvvm_prefetch_global_L2_evict_normal, args}; |
1817 | 1879 | default:
|
1818 | 1880 | llvm_unreachable("Invalid cache eviction priority");
|
1819 | 1881 | }
|
1820 | 1882 | }
|
1821 | 1883 |
|
1822 | 1884 | switch (addressSpace) {
|
1823 | 1885 | case MemSpace::kGenericMemorySpace:
|
1824 |
| - return cacheLevel == CacheLevel::L1 ? llvm::Intrinsic::nvvm_prefetch_L1 |
1825 |
| - : llvm::Intrinsic::nvvm_prefetch_L2; |
| 1886 | + return *cacheLevel == CacheLevel::L1 |
| 1887 | + ? NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L1, args}) |
| 1888 | + : NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L2, args}); |
1826 | 1889 | case MemSpace::kGlobalMemorySpace:
|
1827 |
| - return cacheLevel == CacheLevel::L1 |
1828 |
| - ? llvm::Intrinsic::nvvm_prefetch_global_L1 |
1829 |
| - : llvm::Intrinsic::nvvm_prefetch_global_L2; |
| 1890 | + return *cacheLevel == CacheLevel::L1 |
| 1891 | + ? NVVM::IDArgPair( |
| 1892 | + {llvm::Intrinsic::nvvm_prefetch_global_L1, args}) |
| 1893 | + : NVVM::IDArgPair( |
| 1894 | + {llvm::Intrinsic::nvvm_prefetch_global_L2, args}); |
1830 | 1895 | case MemSpace::kLocalMemorySpace:
|
1831 |
| - return cacheLevel == CacheLevel::L1 |
1832 |
| - ? llvm::Intrinsic::nvvm_prefetch_local_L1 |
1833 |
| - : llvm::Intrinsic::nvvm_prefetch_local_L2; |
| 1896 | + return *cacheLevel == CacheLevel::L1 |
| 1897 | + ? NVVM::IDArgPair( |
| 1898 | + {llvm::Intrinsic::nvvm_prefetch_local_L1, args}) |
| 1899 | + : NVVM::IDArgPair( |
| 1900 | + {llvm::Intrinsic::nvvm_prefetch_local_L2, args}); |
1834 | 1901 | default:
|
1835 | 1902 | llvm_unreachable("Invalid pointer address space");
|
1836 | 1903 | }
|
|
0 commit comments