|
33 | 33 | #include "llvm/IR/IRBuilder.h"
|
34 | 34 | #include "llvm/Support/Casting.h"
|
35 | 35 | #include "llvm/Support/FormatVariadic.h"
|
| 36 | +#include "llvm/Support/NVPTXAddrSpace.h" |
36 | 37 | #include "llvm/Support/raw_ostream.h"
|
37 | 38 | #include <cassert>
|
38 | 39 | #include <optional>
|
@@ -1332,30 +1333,70 @@ LogicalResult NVVM::PrefetchOp::verify() {
|
1332 | 1333 | unsigned addressSpace =
|
1333 | 1334 | llvm::cast<LLVM::LLVMPointerType>(getAddr().getType()).getAddressSpace();
|
1334 | 1335 | std::optional<NVVM::CacheEvictionPriority> evictPriority = getEvictPriority();
|
| 1336 | + std::optional<NVVM::PrefetchCacheLevel> cacheLevel = getCacheLevel(); |
1335 | 1337 |
|
1336 |
| - if (getUniform()) { |
1337 |
| - if (getCacheLevel() != CacheLevel::L1) |
1338 |
| - return emitOpError("unsupported cache level, the only supported uniform " |
1339 |
| - "cache level is L1"); |
| 1338 | + if (getTensormap() && cacheLevel) |
| 1339 | + return emitOpError("cannot specify both tensormap and cache level"); |
1340 | 1340 |
|
1341 |
| - if (addressSpace != MemSpace::kGenericMemorySpace) |
| 1341 | + if (getTensormap()) { |
| 1342 | + if (addressSpace != MemSpace::kGenericMemorySpace && |
| 1343 | + addressSpace != MemSpace::kConstantMemorySpace) { |
1342 | 1344 | return emitOpError(
|
1343 |
| - "prefetch to uniform cache requires a generic pointer"); |
1344 |
| - } |
| 1345 | + "prefetch tensormap requires a generic or constant pointer"); |
| 1346 | + } |
1345 | 1347 |
|
1346 |
| - if (evictPriority) { |
1347 |
| - if (getCacheLevel() != CacheLevel::L2) |
| 1348 | + if (evictPriority) { |
1348 | 1349 | return emitOpError(
|
1349 |
| - "cache eviction priority supported only for cache level L2"); |
1350 |
| - |
1351 |
| - if (addressSpace != MemSpace::kGlobalMemorySpace) |
1352 |
| - return emitOpError("cache eviction priority requires a global pointer"); |
| 1350 | + "prefetch tensormap does not support eviction priority"); |
| 1351 | + } |
1353 | 1352 |
|
1354 |
| - if (*evictPriority != NVVM::CacheEvictionPriority::EvictNormal && |
1355 |
| - *evictPriority != NVVM::CacheEvictionPriority::EvictLast) |
| 1353 | + if (getInParamSpace() && addressSpace != MemSpace::kGenericMemorySpace) { |
1356 | 1354 | return emitOpError(
|
1357 |
| - "unsupported cache eviction priority, only evict_last and " |
1358 |
| - "evict_normal are supported"); |
| 1355 | + "in_param_space can only be specified for a generic pointer"); |
| 1356 | + } |
| 1357 | + |
| 1358 | + } else if (cacheLevel) { |
| 1359 | + if (addressSpace != MemSpace::kGenericMemorySpace && |
| 1360 | + addressSpace != MemSpace::kGlobalMemorySpace && |
| 1361 | + addressSpace != MemSpace::kLocalMemorySpace) { |
| 1362 | + return emitOpError("prefetch to cache level requires a generic, global, " |
| 1363 | + "or local pointer"); |
| 1364 | + } |
| 1365 | + |
| 1366 | + if (getUniform()) { |
| 1367 | + if (*cacheLevel != CacheLevel::L1) { |
| 1368 | + return emitOpError( |
| 1369 | + "unsupported cache level, the only supported uniform " |
| 1370 | + "cache level is L1"); |
| 1371 | + } |
| 1372 | + |
| 1373 | + if (addressSpace != MemSpace::kGenericMemorySpace) { |
| 1374 | + return emitOpError( |
| 1375 | + "prefetch to uniform cache requires a generic pointer"); |
| 1376 | + } |
| 1377 | + } |
| 1378 | + |
| 1379 | + if (evictPriority) { |
| 1380 | + if (*cacheLevel != CacheLevel::L2) |
| 1381 | + return emitOpError( |
| 1382 | + "cache eviction priority supported only for cache level L2"); |
| 1383 | + |
| 1384 | + if (addressSpace != MemSpace::kGlobalMemorySpace) |
| 1385 | + return emitOpError("cache eviction priority requires a global pointer"); |
| 1386 | + |
| 1387 | + if (*evictPriority != NVVM::CacheEvictionPriority::EvictNormal && |
| 1388 | + *evictPriority != NVVM::CacheEvictionPriority::EvictLast) |
| 1389 | + return emitOpError( |
| 1390 | + "unsupported cache eviction priority, only evict_last and " |
| 1391 | + "evict_normal are supported"); |
| 1392 | + } |
| 1393 | + |
| 1394 | + if (getPredicate()) |
| 1395 | + return emitOpError("predicate supported only on prefetch tensormap"); |
| 1396 | + |
| 1397 | + } else { |
| 1398 | + return emitOpError( |
| 1399 | + "requires specification of either cache level or tensormap"); |
1359 | 1400 | }
|
1360 | 1401 |
|
1361 | 1402 | return success();
|
@@ -1964,43 +2005,69 @@ NVVM::IDArgPair DotAccumulate2WayOp::getIntrinsicIDAndArgs(
|
1964 | 2005 | return {ids[type], args};
|
1965 | 2006 | }
|
1966 | 2007 |
|
1967 |
| -llvm::Intrinsic::ID PrefetchOp::getIntrinsicID(NVVM::PrefetchOp &op) { |
| 2008 | +static llvm::Value *getParamCastedAddr(llvm::Value *addr, |
| 2009 | + llvm::IRBuilderBase &builder) { |
| 2010 | + return builder.CreateAddrSpaceCast( |
| 2011 | + addr, |
| 2012 | + llvm::PointerType::get(builder.getContext(), |
| 2013 | + llvm::NVPTXAS::AddressSpace::ADDRESS_SPACE_PARAM)); |
| 2014 | +} |
| 2015 | + |
| 2016 | +NVVM::IDArgPair |
| 2017 | +PrefetchOp::getIntrinsicIDAndArgs(NVVM::PrefetchOp &op, |
| 2018 | + LLVM::ModuleTranslation &mt, |
| 2019 | + llvm::IRBuilderBase &builder) { |
1968 | 2020 | using MemSpace = NVVM::NVVMMemorySpace;
|
1969 | 2021 | using CacheLevel = NVVM::PrefetchCacheLevel;
|
1970 | 2022 |
|
1971 |
| - NVVM::PrefetchCacheLevel cacheLevel = op.getCacheLevel(); |
| 2023 | + std::optional<NVVM::PrefetchCacheLevel> cacheLevel = op.getCacheLevel(); |
1972 | 2024 | std::optional<NVVM::CacheEvictionPriority> evictPriority =
|
1973 | 2025 | op.getEvictPriority();
|
1974 | 2026 | unsigned addressSpace =
|
1975 | 2027 | llvm::cast<LLVM::LLVMPointerType>(op.getAddr().getType())
|
1976 | 2028 | .getAddressSpace();
|
1977 | 2029 |
|
1978 |
| - if (op.getUniform() && cacheLevel == CacheLevel::L1) |
1979 |
| - return llvm::Intrinsic::nvvm_prefetchu_L1; |
| 2030 | + llvm::SmallVector<llvm::Value *> args; |
| 2031 | + llvm::Value *addr = mt.lookupValue(op.getAddr()); |
| 2032 | + args.push_back(op.getInParamSpace() ? getParamCastedAddr(addr, builder) |
| 2033 | + : addr); |
| 2034 | + |
| 2035 | + if (op.getTensormap()) |
| 2036 | + return {llvm::Intrinsic::nvvm_prefetch_tensormap, args}; |
| 2037 | + |
| 2038 | + assert(cacheLevel && "expected cache level for non-tensormap prefetch"); |
| 2039 | + |
| 2040 | + if (op.getUniform() && *cacheLevel == CacheLevel::L1) |
| 2041 | + return {llvm::Intrinsic::nvvm_prefetchu_L1, args}; |
1980 | 2042 |
|
1981 |
| - if (evictPriority && cacheLevel == CacheLevel::L2) { |
| 2043 | + if (evictPriority && *cacheLevel == CacheLevel::L2) { |
1982 | 2044 | switch (*evictPriority) {
|
1983 | 2045 | case NVVM::CacheEvictionPriority::EvictLast:
|
1984 |
| - return llvm::Intrinsic::nvvm_prefetch_global_L2_evict_last; |
| 2046 | + return {llvm::Intrinsic::nvvm_prefetch_global_L2_evict_last, args}; |
1985 | 2047 | case NVVM::CacheEvictionPriority::EvictNormal:
|
1986 |
| - return llvm::Intrinsic::nvvm_prefetch_global_L2_evict_normal; |
| 2048 | + return {llvm::Intrinsic::nvvm_prefetch_global_L2_evict_normal, args}; |
1987 | 2049 | default:
|
1988 | 2050 | llvm_unreachable("Invalid cache eviction priority");
|
1989 | 2051 | }
|
1990 | 2052 | }
|
1991 | 2053 |
|
1992 | 2054 | switch (addressSpace) {
|
1993 | 2055 | case MemSpace::kGenericMemorySpace:
|
1994 |
| - return cacheLevel == CacheLevel::L1 ? llvm::Intrinsic::nvvm_prefetch_L1 |
1995 |
| - : llvm::Intrinsic::nvvm_prefetch_L2; |
| 2056 | + return *cacheLevel == CacheLevel::L1 |
| 2057 | + ? NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L1, args}) |
| 2058 | + : NVVM::IDArgPair({llvm::Intrinsic::nvvm_prefetch_L2, args}); |
1996 | 2059 | case MemSpace::kGlobalMemorySpace:
|
1997 |
| - return cacheLevel == CacheLevel::L1 |
1998 |
| - ? llvm::Intrinsic::nvvm_prefetch_global_L1 |
1999 |
| - : llvm::Intrinsic::nvvm_prefetch_global_L2; |
| 2060 | + return *cacheLevel == CacheLevel::L1 |
| 2061 | + ? NVVM::IDArgPair( |
| 2062 | + {llvm::Intrinsic::nvvm_prefetch_global_L1, args}) |
| 2063 | + : NVVM::IDArgPair( |
| 2064 | + {llvm::Intrinsic::nvvm_prefetch_global_L2, args}); |
2000 | 2065 | case MemSpace::kLocalMemorySpace:
|
2001 |
| - return cacheLevel == CacheLevel::L1 |
2002 |
| - ? llvm::Intrinsic::nvvm_prefetch_local_L1 |
2003 |
| - : llvm::Intrinsic::nvvm_prefetch_local_L2; |
| 2066 | + return *cacheLevel == CacheLevel::L1 |
| 2067 | + ? NVVM::IDArgPair( |
| 2068 | + {llvm::Intrinsic::nvvm_prefetch_local_L1, args}) |
| 2069 | + : NVVM::IDArgPair( |
| 2070 | + {llvm::Intrinsic::nvvm_prefetch_local_L2, args}); |
2004 | 2071 | default:
|
2005 | 2072 | llvm_unreachable("Invalid pointer address space");
|
2006 | 2073 | }
|
|
0 commit comments