|
13 | 13 |
|
14 | 14 | #include "AMDGPU.h" |
15 | 15 | #include "GCNSubtarget.h" |
| 16 | +#include "llvm/Analysis/AliasAnalysis.h" |
| 17 | +#include "llvm/Analysis/CaptureTracking.h" |
| 18 | +#include "llvm/Analysis/ScopedNoAliasAA.h" |
16 | 19 | #include "llvm/Analysis/ValueTracking.h" |
17 | 20 | #include "llvm/CodeGen/TargetPassConfig.h" |
| 21 | +#include "llvm/IR/Argument.h" |
18 | 22 | #include "llvm/IR/Attributes.h" |
| 23 | +#include "llvm/IR/Dominators.h" |
19 | 24 | #include "llvm/IR/IRBuilder.h" |
| 25 | +#include "llvm/IR/InstIterator.h" |
20 | 26 | #include "llvm/IR/IntrinsicsAMDGPU.h" |
| 27 | +#include "llvm/IR/LLVMContext.h" |
21 | 28 | #include "llvm/IR/MDBuilder.h" |
22 | 29 | #include "llvm/Target/TargetMachine.h" |
| 30 | +#include <string> |
23 | 31 |
|
24 | 32 | #define DEBUG_TYPE "amdgpu-lower-kernel-arguments" |
25 | 33 |
|
@@ -58,6 +66,145 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) { |
58 | 66 | return InsPt; |
59 | 67 | } |
60 | 68 |
|
| 69 | +static void addAliasScopeMetadata(Function &F, DataLayout const &DL) { |
| 70 | + // Collect noalias arguments. |
| 71 | + auto NoAliasArgs = SmallVector<Argument const *, 4u>(); |
| 72 | + |
| 73 | + for (auto &Arg : F.args()) |
| 74 | + if (Arg.hasNoAliasAttr() && !Arg.use_empty()) |
| 75 | + NoAliasArgs.push_back(&Arg); |
| 76 | + |
| 77 | + if (NoAliasArgs.empty()) |
| 78 | + return; |
| 79 | + |
| 80 | + // Add alias scopes for each noalias argument. |
| 81 | + auto MDB = MDBuilder(F.getContext()); |
| 82 | + auto NewScopes = DenseMap<Argument const *, MDNode *>(); |
| 83 | + auto *NewDomain = MDB.createAnonymousAliasScopeDomain(F.getName()); |
| 84 | + |
| 85 | + for (auto I = 0u; I < NoAliasArgs.size(); ++I) { |
| 86 | + auto *Arg = NoAliasArgs[I]; |
| 87 | + auto Name = std::string(F.getName()); |
| 88 | + |
| 89 | + if (Arg->hasName()) |
| 90 | + Name += std::string(": %") + std::string(Arg->getName()); |
| 91 | + else |
| 92 | + Name += std::string(": argument ") + std::to_string(I); |
| 93 | + |
| 94 | + auto *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name); |
| 95 | + NewScopes.insert(std::make_pair(Arg, NewScope)); |
| 96 | + } |
| 97 | + |
| 98 | + // Iterate over all instructions. |
| 99 | + auto DT = DominatorTree(); |
| 100 | + DT.recalculate(F); |
| 101 | + |
| 102 | + for (auto Inst = inst_begin(F); Inst != inst_end(F); ++Inst) { |
| 103 | + // If instruction accesses memory, collect its pointer arguments. |
| 104 | + auto *I = &(*Inst); |
| 105 | + auto IsFuncCall = false; |
| 106 | + auto PtrArgs = SmallVector<Value const *, 2u>(); |
| 107 | + |
| 108 | + if (auto *LI = dyn_cast<LoadInst>(I)) |
| 109 | + PtrArgs.push_back(LI->getPointerOperand()); |
| 110 | + else if (auto *SI = dyn_cast<StoreInst>(I)) |
| 111 | + PtrArgs.push_back(SI->getPointerOperand()); |
| 112 | + else if (auto *VAAI = dyn_cast<VAArgInst>(I)) |
| 113 | + PtrArgs.push_back(VAAI->getPointerOperand()); |
| 114 | + else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I)) |
| 115 | + PtrArgs.push_back(CXI->getPointerOperand()); |
| 116 | + else if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) |
| 117 | + PtrArgs.push_back(RMWI->getPointerOperand()); |
| 118 | + else if (auto *Call = dyn_cast<CallBase>(I)) { |
| 119 | + if (Call->doesNotAccessMemory()) |
| 120 | + continue; |
| 121 | + |
| 122 | + IsFuncCall = true; |
| 123 | + |
| 124 | + for (auto &Arg : Call->args()) { |
| 125 | + if (!Arg->getType()->isPointerTy()) |
| 126 | + continue; |
| 127 | + |
| 128 | + PtrArgs.push_back(Arg); |
| 129 | + } |
| 130 | + } |
| 131 | + |
| 132 | + if (PtrArgs.empty() && !IsFuncCall) |
| 133 | + continue; |
| 134 | + |
| 135 | + // Collect underlying objects of pointer arguments. |
| 136 | + auto Scopes = SmallVector<Metadata *, 4u>(); |
| 137 | + auto ObjSet = SmallPtrSet<Value const*, 4u>(); |
| 138 | + auto NoAliases = SmallVector<Metadata *, 4u>(); |
| 139 | + |
| 140 | + for (auto &Ptr : PtrArgs) { |
| 141 | + auto Objects = SmallVector<Value const *, 4u>(); |
| 142 | + getUnderlyingObjects(Ptr, Objects); |
| 143 | + ObjSet.insert_range(Objects); |
| 144 | + } |
| 145 | + |
| 146 | + auto RequiresNoCaptureBefore = false; |
| 147 | + auto UsesUnknownObject = false; |
| 148 | + auto UsesAliasingPtr = false; |
| 149 | + |
| 150 | + for (auto *Val : ObjSet) { |
| 151 | + if (isa<ConstantPointerNull>(Val) || isa<ConstantDataVector>(Val) || |
| 152 | + isa<ConstantInt>(Val) || isa<ConstantFP>(Val) || |
| 153 | + isa<UndefValue>(Val)) |
| 154 | + continue; |
| 155 | + |
| 156 | + if (auto *Arg = dyn_cast<Argument>(Val)) { |
| 157 | + if (!Arg->hasAttribute(Attribute::NoAlias)) |
| 158 | + UsesAliasingPtr = true; |
| 159 | + } |
| 160 | + else |
| 161 | + UsesAliasingPtr = true; |
| 162 | + |
| 163 | + if (isEscapeSource(Val)) |
| 164 | + RequiresNoCaptureBefore = true; |
| 165 | + else if (!isa<Argument>(Val) && isIdentifiedObject(Val)) |
| 166 | + UsesUnknownObject = true; |
| 167 | + } |
| 168 | + |
| 169 | + if (UsesUnknownObject) |
| 170 | + continue; |
| 171 | + |
| 172 | + // Collect noalias scopes for instruction. |
| 173 | + for (auto *Arg : NoAliasArgs) { |
| 174 | + if (ObjSet.contains(Arg)) |
| 175 | + continue; |
| 176 | + |
| 177 | + if (!RequiresNoCaptureBefore || |
| 178 | + !capturesAnything(PointerMayBeCapturedBefore( |
| 179 | + Arg, false, I, &DT, false, CaptureComponents::Provenance))) |
| 180 | + NoAliases.push_back(NewScopes[Arg]); |
| 181 | + } |
| 182 | + |
| 183 | + // Add noalias metadata to instruction. |
| 184 | + if (!NoAliases.empty()) { |
| 185 | + auto *NewMD = MDNode::concatenate( |
| 186 | + Inst->getMetadata(LLVMContext::MD_noalias), |
| 187 | + MDNode::get(F.getContext(), NoAliases)); |
| 188 | + Inst->setMetadata(LLVMContext::MD_noalias, NewMD); |
| 189 | + } |
| 190 | + |
| 191 | + // Collect scopes for alias.scope metadata. |
| 192 | + if (!UsesAliasingPtr) |
| 193 | + for (auto *Arg : NoAliasArgs) { |
| 194 | + if (ObjSet.count(Arg)) |
| 195 | + Scopes.push_back(NewScopes[Arg]); |
| 196 | + } |
| 197 | + |
| 198 | + // Add alias.scope metadata to instruction. |
| 199 | + if (!Scopes.empty()) { |
| 200 | + auto *NewMD = MDNode::concatenate( |
| 201 | + Inst->getMetadata(LLVMContext::MD_alias_scope), |
| 202 | + MDNode::get(F.getContext(), Scopes)); |
| 203 | + Inst->setMetadata(LLVMContext::MD_alias_scope, NewMD); |
| 204 | + } |
| 205 | + } |
| 206 | +} |
| 207 | + |
61 | 208 | static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { |
62 | 209 | CallingConv::ID CC = F.getCallingConv(); |
63 | 210 | if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty()) |
@@ -86,6 +233,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { |
86 | 233 | Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize)); |
87 | 234 |
|
88 | 235 | uint64_t ExplicitArgOffset = 0; |
| 236 | + |
| 237 | + addAliasScopeMetadata(F, F.getParent()->getDataLayout()); |
| 238 | + |
89 | 239 | for (Argument &Arg : F.args()) { |
90 | 240 | const bool IsByRef = Arg.hasByRefAttr(); |
91 | 241 | Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType(); |
@@ -124,11 +274,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { |
124 | 274 | PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) && |
125 | 275 | !ST.hasUsableDSOffset()) |
126 | 276 | continue; |
127 | | - |
128 | | - // FIXME: We can replace this with equivalent alias.scope/noalias |
129 | | - // metadata, but this appears to be a lot of work. |
130 | | - if (Arg.hasNoAliasAttr()) |
131 | | - continue; |
132 | 277 | } |
133 | 278 |
|
134 | 279 | auto *VT = dyn_cast<FixedVectorType>(ArgTy); |
@@ -215,8 +360,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { |
215 | 360 | } |
216 | 361 | } |
217 | 362 |
|
218 | | - // TODO: Convert noalias arg to !noalias |
219 | | - |
220 | 363 | if (DoShiftOpt) { |
221 | 364 | Value *ExtractBits = OffsetDiff == 0 ? |
222 | 365 | Load : Builder.CreateLShr(Load, OffsetDiff * 8); |
|
0 commit comments