Skip to content

Commit 7396ea1

Browse files
author
Leon Clark
committed
Update kernarg lowering and tests.
1 parent a45b5f4 commit 7396ea1

File tree

9 files changed

+876
-670
lines changed

9 files changed

+876
-670
lines changed

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 150 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,21 @@
1313

1414
#include "AMDGPU.h"
1515
#include "GCNSubtarget.h"
16+
#include "llvm/Analysis/AliasAnalysis.h"
17+
#include "llvm/Analysis/CaptureTracking.h"
18+
#include "llvm/Analysis/ScopedNoAliasAA.h"
1619
#include "llvm/Analysis/ValueTracking.h"
1720
#include "llvm/CodeGen/TargetPassConfig.h"
21+
#include "llvm/IR/Argument.h"
1822
#include "llvm/IR/Attributes.h"
23+
#include "llvm/IR/Dominators.h"
1924
#include "llvm/IR/IRBuilder.h"
25+
#include "llvm/IR/InstIterator.h"
2026
#include "llvm/IR/IntrinsicsAMDGPU.h"
27+
#include "llvm/IR/LLVMContext.h"
2128
#include "llvm/IR/MDBuilder.h"
2229
#include "llvm/Target/TargetMachine.h"
30+
#include <string>
2331

2432
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
2533

@@ -58,6 +66,145 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
5866
return InsPt;
5967
}
6068

69+
static void addAliasScopeMetadata(Function &F, DataLayout const &DL) {
70+
// Collect noalias arguments.
71+
auto NoAliasArgs = SmallVector<Argument const *, 4u>();
72+
73+
for (auto &Arg : F.args())
74+
if (Arg.hasNoAliasAttr() && !Arg.use_empty())
75+
NoAliasArgs.push_back(&Arg);
76+
77+
if (NoAliasArgs.empty())
78+
return;
79+
80+
// Add alias scopes for each noalias argument.
81+
auto MDB = MDBuilder(F.getContext());
82+
auto NewScopes = DenseMap<Argument const *, MDNode *>();
83+
auto *NewDomain = MDB.createAnonymousAliasScopeDomain(F.getName());
84+
85+
for (auto I = 0u; I < NoAliasArgs.size(); ++I) {
86+
auto *Arg = NoAliasArgs[I];
87+
auto Name = std::string(F.getName());
88+
89+
if (Arg->hasName())
90+
Name += std::string(": %") + std::string(Arg->getName());
91+
else
92+
Name += std::string(": argument ") + std::to_string(I);
93+
94+
auto *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
95+
NewScopes.insert(std::make_pair(Arg, NewScope));
96+
}
97+
98+
// Iterate over all instructions.
99+
auto DT = DominatorTree();
100+
DT.recalculate(F);
101+
102+
for (auto Inst = inst_begin(F); Inst != inst_end(F); ++Inst) {
103+
// If instruction accesses memory, collect its pointer arguments.
104+
auto *I = &(*Inst);
105+
auto IsFuncCall = false;
106+
auto PtrArgs = SmallVector<Value const *, 2u>();
107+
108+
if (auto *LI = dyn_cast<LoadInst>(I))
109+
PtrArgs.push_back(LI->getPointerOperand());
110+
else if (auto *SI = dyn_cast<StoreInst>(I))
111+
PtrArgs.push_back(SI->getPointerOperand());
112+
else if (auto *VAAI = dyn_cast<VAArgInst>(I))
113+
PtrArgs.push_back(VAAI->getPointerOperand());
114+
else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(I))
115+
PtrArgs.push_back(CXI->getPointerOperand());
116+
else if (auto *RMWI = dyn_cast<AtomicRMWInst>(I))
117+
PtrArgs.push_back(RMWI->getPointerOperand());
118+
else if (auto *Call = dyn_cast<CallBase>(I)) {
119+
if (Call->doesNotAccessMemory())
120+
continue;
121+
122+
IsFuncCall = true;
123+
124+
for (auto &Arg : Call->args()) {
125+
if (!Arg->getType()->isPointerTy())
126+
continue;
127+
128+
PtrArgs.push_back(Arg);
129+
}
130+
}
131+
132+
if (PtrArgs.empty() && !IsFuncCall)
133+
continue;
134+
135+
// Collect underlying objects of pointer arguments.
136+
auto Scopes = SmallVector<Metadata *, 4u>();
137+
auto ObjSet = SmallPtrSet<Value const*, 4u>();
138+
auto NoAliases = SmallVector<Metadata *, 4u>();
139+
140+
for (auto &Ptr : PtrArgs) {
141+
auto Objects = SmallVector<Value const *, 4u>();
142+
getUnderlyingObjects(Ptr, Objects);
143+
ObjSet.insert_range(Objects);
144+
}
145+
146+
auto RequiresNoCaptureBefore = false;
147+
auto UsesUnknownObject = false;
148+
auto UsesAliasingPtr = false;
149+
150+
for (auto *Val : ObjSet) {
151+
if (isa<ConstantPointerNull>(Val) || isa<ConstantDataVector>(Val) ||
152+
isa<ConstantInt>(Val) || isa<ConstantFP>(Val) ||
153+
isa<UndefValue>(Val))
154+
continue;
155+
156+
if (auto *Arg = dyn_cast<Argument>(Val)) {
157+
if (!Arg->hasAttribute(Attribute::NoAlias))
158+
UsesAliasingPtr = true;
159+
}
160+
else
161+
UsesAliasingPtr = true;
162+
163+
if (isEscapeSource(Val))
164+
RequiresNoCaptureBefore = true;
165+
else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
166+
UsesUnknownObject = true;
167+
}
168+
169+
if (UsesUnknownObject)
170+
continue;
171+
172+
// Collect noalias scopes for instruction.
173+
for (auto *Arg : NoAliasArgs) {
174+
if (ObjSet.contains(Arg))
175+
continue;
176+
177+
if (!RequiresNoCaptureBefore ||
178+
!capturesAnything(PointerMayBeCapturedBefore(
179+
Arg, false, I, &DT, false, CaptureComponents::Provenance)))
180+
NoAliases.push_back(NewScopes[Arg]);
181+
}
182+
183+
// Add noalias metadata to instruction.
184+
if (!NoAliases.empty()) {
185+
auto *NewMD = MDNode::concatenate(
186+
Inst->getMetadata(LLVMContext::MD_noalias),
187+
MDNode::get(F.getContext(), NoAliases));
188+
Inst->setMetadata(LLVMContext::MD_noalias, NewMD);
189+
}
190+
191+
// Collect scopes for alias.scope metadata.
192+
if (!UsesAliasingPtr)
193+
for (auto *Arg : NoAliasArgs) {
194+
if (ObjSet.count(Arg))
195+
Scopes.push_back(NewScopes[Arg]);
196+
}
197+
198+
// Add alias.scope metadata to instruction.
199+
if (!Scopes.empty()) {
200+
auto *NewMD = MDNode::concatenate(
201+
Inst->getMetadata(LLVMContext::MD_alias_scope),
202+
MDNode::get(F.getContext(), Scopes));
203+
Inst->setMetadata(LLVMContext::MD_alias_scope, NewMD);
204+
}
205+
}
206+
}
207+
61208
static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
62209
CallingConv::ID CC = F.getCallingConv();
63210
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
@@ -86,6 +233,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
86233
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
87234

88235
uint64_t ExplicitArgOffset = 0;
236+
237+
addAliasScopeMetadata(F, F.getParent()->getDataLayout());
238+
89239
for (Argument &Arg : F.args()) {
90240
const bool IsByRef = Arg.hasByRefAttr();
91241
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
@@ -124,11 +274,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
124274
PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
125275
!ST.hasUsableDSOffset())
126276
continue;
127-
128-
// FIXME: We can replace this with equivalent alias.scope/noalias
129-
// metadata, but this appears to be a lot of work.
130-
if (Arg.hasNoAliasAttr())
131-
continue;
132277
}
133278

134279
auto *VT = dyn_cast<FixedVectorType>(ArgTy);
@@ -215,8 +360,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
215360
}
216361
}
217362

218-
// TODO: Convert noalias arg to !noalias
219-
220363
if (DoShiftOpt) {
221364
Value *ExtractBits = OffsetDiff == 0 ?
222365
Load : Builder.CreateLShr(Load, OffsetDiff * 8);

0 commit comments

Comments
 (0)