1212// ===----------------------------------------------------------------------===//
1313
1414#include " AMDGPU.h"
15+ #include " AMDGPUAsanInstrumentation.h"
1516#include " GCNSubtarget.h"
17+ #include " llvm/Analysis/AliasAnalysis.h"
18+ #include " llvm/Analysis/CaptureTracking.h"
19+ #include " llvm/Analysis/ScopedNoAliasAA.h"
1620#include " llvm/Analysis/ValueTracking.h"
1721#include " llvm/CodeGen/TargetPassConfig.h"
22+ #include " llvm/IR/Argument.h"
1823#include " llvm/IR/Attributes.h"
24+ #include " llvm/IR/Dominators.h"
1925#include " llvm/IR/IRBuilder.h"
26+ #include " llvm/IR/InstIterator.h"
27+ #include " llvm/IR/Instruction.h"
28+ #include " llvm/IR/Instructions.h"
2029#include " llvm/IR/IntrinsicsAMDGPU.h"
30+ #include " llvm/IR/LLVMContext.h"
2131#include " llvm/IR/MDBuilder.h"
2232#include " llvm/Target/TargetMachine.h"
33+ #include < optional>
34+ #include < string>
2335
2436#define DEBUG_TYPE " amdgpu-lower-kernel-arguments"
2537
@@ -37,6 +49,7 @@ class AMDGPULowerKernelArguments : public FunctionPass {
3749
3850 void getAnalysisUsage (AnalysisUsage &AU) const override {
3951 AU.addRequired <TargetPassConfig>();
52+ AU.addRequired <DominatorTreeWrapperPass>();
4053 AU.setPreservesAll ();
4154 }
4255};
@@ -58,7 +71,125 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
5871 return InsPt;
5972}
6073
61- static bool lowerKernelArguments (Function &F, const TargetMachine &TM) {
74+ static void addAliasScopeMetadata (Function &F, const DataLayout &DL,
75+ DominatorTree &DT) {
76+ // Collect noalias arguments.
77+ SmallVector<const Argument *, 4u > NoAliasArgs;
78+
79+ for (Argument &Arg : F.args ())
80+ if (Arg.hasNoAliasAttr () && !Arg.use_empty ())
81+ NoAliasArgs.push_back (&Arg);
82+
83+ if (NoAliasArgs.empty ())
84+ return ;
85+
86+ // Add alias scopes for each noalias argument.
87+ MDBuilder MDB (F.getContext ());
88+ DenseMap<const Argument *, MDNode *> NewScopes;
89+ MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain (F.getName ());
90+
91+ for (unsigned I = 0u ; I < NoAliasArgs.size (); ++I) {
92+ const Argument *Arg = NoAliasArgs[I];
93+ MDNode *NewScope = MDB.createAnonymousAliasScope (NewDomain, Arg->getName ());
94+ NewScopes.insert ({Arg, NewScope});
95+ }
96+
97+ // Iterate over all instructions.
98+ for (inst_iterator Inst = inst_begin (F), InstEnd = inst_end (F);
99+ Inst != InstEnd; ++Inst) {
100+ // If instruction accesses memory, collect its pointer arguments.
101+ Instruction *I = &(*Inst);
102+ SmallVector<const Value *, 2u > PtrArgs;
103+
104+ if (std::optional<MemoryLocation> MO = MemoryLocation::getOrNone (I))
105+ PtrArgs.push_back (MO->Ptr );
106+ else if (const CallBase *Call = dyn_cast<CallBase>(I)) {
107+ if (Call->doesNotAccessMemory ())
108+ continue ;
109+
110+ for (Value *Arg : Call->args ()) {
111+ if (!Arg->getType ()->isPointerTy ())
112+ continue ;
113+
114+ PtrArgs.push_back (Arg);
115+ }
116+ }
117+
118+ if (PtrArgs.empty ())
119+ continue ;
120+
121+ // Collect underlying objects of pointer arguments.
122+ SmallVector<Metadata *, 4u > Scopes;
123+ SmallPtrSet<const Value *, 4u > ObjSet;
124+ SmallVector<Metadata *, 4u > NoAliases;
125+
126+ for (const Value *Val : PtrArgs) {
127+ SmallVector<const Value *, 4u > Objects;
128+ getUnderlyingObjects (Val, Objects);
129+ ObjSet.insert_range (Objects);
130+ }
131+
132+ bool RequiresNoCaptureBefore = false ;
133+ bool UsesUnknownObject = false ;
134+ bool UsesAliasingPtr = false ;
135+
136+ for (const Value *Val : ObjSet) {
137+ if (isa<ConstantData>(Val))
138+ continue ;
139+
140+ if (const Argument *Arg = dyn_cast<Argument>(Val)) {
141+ if (!Arg->hasAttribute (Attribute::NoAlias))
142+ UsesAliasingPtr = true ;
143+ } else
144+ UsesAliasingPtr = true ;
145+
146+ if (isEscapeSource (Val))
147+ RequiresNoCaptureBefore = true ;
148+ else if (!isa<Argument>(Val) && isIdentifiedObject (Val))
149+ UsesUnknownObject = true ;
150+ }
151+
152+ if (UsesUnknownObject)
153+ continue ;
154+
155+ // Collect noalias scopes for instruction.
156+ for (const Argument *Arg : NoAliasArgs) {
157+ if (ObjSet.contains (Arg))
158+ continue ;
159+
160+ if (!RequiresNoCaptureBefore ||
161+ !capturesAnything (PointerMayBeCapturedBefore (
162+ Arg, false , I, &DT, false , CaptureComponents::Provenance)))
163+ NoAliases.push_back (NewScopes[Arg]);
164+ }
165+
166+ // Add noalias metadata to instruction.
167+ if (!NoAliases.empty ()) {
168+ MDNode *NewMD =
169+ MDNode::concatenate (Inst->getMetadata (LLVMContext::MD_noalias),
170+ MDNode::get (F.getContext (), NoAliases));
171+ Inst->setMetadata (LLVMContext::MD_noalias, NewMD);
172+ }
173+
174+ // Collect scopes for alias.scope metadata.
175+ if (!UsesAliasingPtr)
176+ for (const Argument *Arg : NoAliasArgs) {
177+ if (ObjSet.count (Arg))
178+ Scopes.push_back (NewScopes[Arg]);
179+ }
180+
181+ // Add alias.scope metadata to instruction.
182+ if (!Scopes.empty ()) {
183+ MDNode *NewMD =
184+ MDNode::concatenate (Inst->getMetadata (LLVMContext::MD_alias_scope),
185+ MDNode::get (F.getContext (), Scopes));
186+ Inst->setMetadata (LLVMContext::MD_alias_scope, NewMD);
187+ }
188+ }
189+ }
190+
191+ static bool lowerKernelArguments (Function &F, const TargetMachine &TM,
192+ DominatorTree &DT) {
62193 CallingConv::ID CC = F.getCallingConv ();
63194 if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty ())
64195 return false ;
@@ -86,6 +217,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
86217 Attribute::getWithDereferenceableBytes (Ctx, TotalKernArgSize));
87218
88219 uint64_t ExplicitArgOffset = 0 ;
220+
221+ addAliasScopeMetadata (F, F.getParent ()->getDataLayout (), DT);
222+
89223 for (Argument &Arg : F.args ()) {
90224 const bool IsByRef = Arg.hasByRefAttr ();
91225 Type *ArgTy = IsByRef ? Arg.getParamByRefType () : Arg.getType ();
@@ -124,11 +258,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
124258 PT->getAddressSpace () == AMDGPUAS::REGION_ADDRESS) &&
125259 !ST.hasUsableDSOffset ())
126260 continue ;
127-
128- // FIXME: We can replace this with equivalent alias.scope/noalias
129- // metadata, but this appears to be a lot of work.
130- if (Arg.hasNoAliasAttr ())
131- continue ;
132261 }
133262
134263 auto *VT = dyn_cast<FixedVectorType>(ArgTy);
@@ -215,8 +344,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
215344 }
216345 }
217346
218- // TODO: Convert noalias arg to !noalias
219-
220347 if (DoShiftOpt) {
221348 Value *ExtractBits = OffsetDiff == 0 ?
222349 Load : Builder.CreateLShr (Load, OffsetDiff * 8 );
@@ -245,7 +372,8 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
245372bool AMDGPULowerKernelArguments::runOnFunction (Function &F) {
246373 auto &TPC = getAnalysis<TargetPassConfig>();
247374 const TargetMachine &TM = TPC.getTM <TargetMachine>();
248- return lowerKernelArguments (F, TM);
375+ DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree ();
376+ return lowerKernelArguments (F, TM, DT);
249377}
250378
251379INITIALIZE_PASS_BEGIN (AMDGPULowerKernelArguments, DEBUG_TYPE,
@@ -261,7 +389,8 @@ FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {
261389
262390PreservedAnalyses
263391AMDGPULowerKernelArgumentsPass::run (Function &F, FunctionAnalysisManager &AM) {
264- bool Changed = lowerKernelArguments (F, TM);
392+ DominatorTree &DT = *AM.getCachedResult <DominatorTreeAnalysis>(F);
393+ bool Changed = lowerKernelArguments (F, TM, DT);
265394 if (Changed) {
266395 // TODO: Preserves a lot more.
267396 PreservedAnalyses PA;
0 commit comments