Skip to content

Commit 9f4f13a

Browse files
PeddleSpamLeon Clark
andauthored
[AMDGPU] Propagate alias information in AMDGPULowerKernelArguments. (#161375)
Emit `!noalias` and `alias.scope` metadata for `noalias` kernel arguments. --------- Co-authored-by: Leon Clark <[email protected]>
1 parent f42af14 commit 9f4f13a

File tree

10 files changed

+1145
-590
lines changed

10 files changed

+1145
-590
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 139 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,26 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include "AMDGPU.h"
15+
#include "AMDGPUAsanInstrumentation.h"
1516
#include "GCNSubtarget.h"
17+
#include "llvm/Analysis/AliasAnalysis.h"
18+
#include "llvm/Analysis/CaptureTracking.h"
19+
#include "llvm/Analysis/ScopedNoAliasAA.h"
1620
#include "llvm/Analysis/ValueTracking.h"
1721
#include "llvm/CodeGen/TargetPassConfig.h"
22+
#include "llvm/IR/Argument.h"
1823
#include "llvm/IR/Attributes.h"
24+
#include "llvm/IR/Dominators.h"
1925
#include "llvm/IR/IRBuilder.h"
26+
#include "llvm/IR/InstIterator.h"
27+
#include "llvm/IR/Instruction.h"
28+
#include "llvm/IR/Instructions.h"
2029
#include "llvm/IR/IntrinsicsAMDGPU.h"
30+
#include "llvm/IR/LLVMContext.h"
2131
#include "llvm/IR/MDBuilder.h"
2232
#include "llvm/Target/TargetMachine.h"
33+
#include <optional>
34+
#include <string>
2335

2436
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
2537

@@ -37,6 +49,7 @@ class AMDGPULowerKernelArguments : public FunctionPass {
3749

3850
void getAnalysisUsage(AnalysisUsage &AU) const override {
3951
AU.addRequired<TargetPassConfig>();
52+
AU.addRequired<DominatorTreeWrapperPass>();
4053
AU.setPreservesAll();
4154
}
4255
};
@@ -58,7 +71,125 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
5871
return InsPt;
5972
}
6073

61-
static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
74+
static void addAliasScopeMetadata(Function &F, const DataLayout &DL,
75+
DominatorTree &DT) {
76+
// Collect noalias arguments.
77+
SmallVector<const Argument *, 4u> NoAliasArgs;
78+
79+
for (Argument &Arg : F.args())
80+
if (Arg.hasNoAliasAttr() && !Arg.use_empty())
81+
NoAliasArgs.push_back(&Arg);
82+
83+
if (NoAliasArgs.empty())
84+
return;
85+
86+
// Add alias scopes for each noalias argument.
87+
MDBuilder MDB(F.getContext());
88+
DenseMap<const Argument *, MDNode *> NewScopes;
89+
MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(F.getName());
90+
91+
for (unsigned I = 0u; I < NoAliasArgs.size(); ++I) {
92+
const Argument *Arg = NoAliasArgs[I];
93+
MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Arg->getName());
94+
NewScopes.insert({Arg, NewScope});
95+
}
96+
97+
// Iterate over all instructions.
98+
for (inst_iterator Inst = inst_begin(F), InstEnd = inst_end(F);
99+
Inst != InstEnd; ++Inst) {
100+
// If instruction accesses memory, collect its pointer arguments.
101+
Instruction *I = &(*Inst);
102+
SmallVector<const Value *, 2u> PtrArgs;
103+
104+
if (std::optional<MemoryLocation> MO = MemoryLocation::getOrNone(I))
105+
PtrArgs.push_back(MO->Ptr);
106+
else if (const CallBase *Call = dyn_cast<CallBase>(I)) {
107+
if (Call->doesNotAccessMemory())
108+
continue;
109+
110+
for (Value *Arg : Call->args()) {
111+
if (!Arg->getType()->isPointerTy())
112+
continue;
113+
114+
PtrArgs.push_back(Arg);
115+
}
116+
}
117+
118+
if (PtrArgs.empty())
119+
continue;
120+
121+
// Collect underlying objects of pointer arguments.
122+
SmallVector<Metadata *, 4u> Scopes;
123+
SmallPtrSet<const Value *, 4u> ObjSet;
124+
SmallVector<Metadata *, 4u> NoAliases;
125+
126+
for (const Value *Val : PtrArgs) {
127+
SmallVector<const Value *, 4u> Objects;
128+
getUnderlyingObjects(Val, Objects);
129+
ObjSet.insert_range(Objects);
130+
}
131+
132+
bool RequiresNoCaptureBefore = false;
133+
bool UsesUnknownObject = false;
134+
bool UsesAliasingPtr = false;
135+
136+
for (const Value *Val : ObjSet) {
137+
if (isa<ConstantData>(Val))
138+
continue;
139+
140+
if (const Argument *Arg = dyn_cast<Argument>(Val)) {
141+
if (!Arg->hasAttribute(Attribute::NoAlias))
142+
UsesAliasingPtr = true;
143+
} else
144+
UsesAliasingPtr = true;
145+
146+
if (isEscapeSource(Val))
147+
RequiresNoCaptureBefore = true;
148+
else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
149+
UsesUnknownObject = true;
150+
}
151+
152+
if (UsesUnknownObject)
153+
continue;
154+
155+
// Collect noalias scopes for instruction.
156+
for (const Argument *Arg : NoAliasArgs) {
157+
if (ObjSet.contains(Arg))
158+
continue;
159+
160+
if (!RequiresNoCaptureBefore ||
161+
!capturesAnything(PointerMayBeCapturedBefore(
162+
Arg, false, I, &DT, false, CaptureComponents::Provenance)))
163+
NoAliases.push_back(NewScopes[Arg]);
164+
}
165+
166+
// Add noalias metadata to instruction.
167+
if (!NoAliases.empty()) {
168+
MDNode *NewMD =
169+
MDNode::concatenate(Inst->getMetadata(LLVMContext::MD_noalias),
170+
MDNode::get(F.getContext(), NoAliases));
171+
Inst->setMetadata(LLVMContext::MD_noalias, NewMD);
172+
}
173+
174+
// Collect scopes for alias.scope metadata.
175+
if (!UsesAliasingPtr)
176+
for (const Argument *Arg : NoAliasArgs) {
177+
if (ObjSet.count(Arg))
178+
Scopes.push_back(NewScopes[Arg]);
179+
}
180+
181+
// Add alias.scope metadata to instruction.
182+
if (!Scopes.empty()) {
183+
MDNode *NewMD =
184+
MDNode::concatenate(Inst->getMetadata(LLVMContext::MD_alias_scope),
185+
MDNode::get(F.getContext(), Scopes));
186+
Inst->setMetadata(LLVMContext::MD_alias_scope, NewMD);
187+
}
188+
}
189+
}
190+
191+
static bool lowerKernelArguments(Function &F, const TargetMachine &TM,
192+
DominatorTree &DT) {
62193
CallingConv::ID CC = F.getCallingConv();
63194
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
64195
return false;
@@ -86,6 +217,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
86217
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
87218

88219
uint64_t ExplicitArgOffset = 0;
220+
221+
addAliasScopeMetadata(F, F.getParent()->getDataLayout(), DT);
222+
89223
for (Argument &Arg : F.args()) {
90224
const bool IsByRef = Arg.hasByRefAttr();
91225
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
@@ -124,11 +258,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
124258
PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
125259
!ST.hasUsableDSOffset())
126260
continue;
127-
128-
// FIXME: We can replace this with equivalent alias.scope/noalias
129-
// metadata, but this appears to be a lot of work.
130-
if (Arg.hasNoAliasAttr())
131-
continue;
132261
}
133262

134263
auto *VT = dyn_cast<FixedVectorType>(ArgTy);
@@ -215,8 +344,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
215344
}
216345
}
217346

218-
// TODO: Convert noalias arg to !noalias
219-
220347
if (DoShiftOpt) {
221348
Value *ExtractBits = OffsetDiff == 0 ?
222349
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
@@ -245,7 +372,8 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
245372
bool AMDGPULowerKernelArguments::runOnFunction(Function &F) {
246373
auto &TPC = getAnalysis<TargetPassConfig>();
247374
const TargetMachine &TM = TPC.getTM<TargetMachine>();
248-
return lowerKernelArguments(F, TM);
375+
DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
376+
return lowerKernelArguments(F, TM, DT);
249377
}
250378

251379
INITIALIZE_PASS_BEGIN(AMDGPULowerKernelArguments, DEBUG_TYPE,
@@ -261,7 +389,8 @@ FunctionPass *llvm::createAMDGPULowerKernelArgumentsPass() {
261389

262390
PreservedAnalyses
263391
AMDGPULowerKernelArgumentsPass::run(Function &F, FunctionAnalysisManager &AM) {
264-
bool Changed = lowerKernelArguments(F, TM);
392+
DominatorTree &DT = *AM.getCachedResult<DominatorTreeAnalysis>(F);
393+
bool Changed = lowerKernelArguments(F, TM, DT);
265394
if (Changed) {
266395
// TODO: Preserves a lot more.
267396
PreservedAnalyses PA;

0 commit comments

Comments
 (0)