Skip to content

Commit 1a4d72f

Browse files
author
Leon Clark
committed
[AMDGPU] Propagate alias information in AMDGPULowerKernelArguments.
This patch reimplements https://reviews.llvm.org/D108363 and https://reviews.llvm.org/D108361 to emit !noalias and !alias.scope metadata for noalias kernel arguments.
1 parent 680b3b7 commit 1a4d72f

File tree

12 files changed

+2453
-1291
lines changed

12 files changed

+2453
-1291
lines changed

llvm/include/llvm/Transforms/Utils/Cloning.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,17 @@ void updateProfileCallee(
362362
Function *Callee, int64_t EntryDelta,
363363
const ValueMap<const Value *, WeakTrackingVH> *VMap = nullptr);
364364

365+
/// Adds `!noalias` and `!alias.scope` metadata for `CB`'s called function's
366+
/// `noalias` argument based memory accesses.
367+
void addAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
368+
const DataLayout &DL, AAResults *CalleeAAR,
369+
ClonedCodeInfo &InlinedFunctionInfo,
370+
bool UseNoAliasIntrinsic);
371+
372+
/// Adds `!noalias` and `!alias.scope` metadata for `F`'s `noalias` argument
373+
/// based memory accesses.
374+
void addAliasScopeMetadata(Function &F);
375+
365376
/// Find the 'llvm.experimental.noalias.scope.decl' intrinsics in the specified
366377
/// basic blocks and extract their scope. These are candidates for duplication
367378
/// when cloning.

llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/IR/IntrinsicsAMDGPU.h"
2121
#include "llvm/IR/MDBuilder.h"
2222
#include "llvm/Target/TargetMachine.h"
23+
#include "llvm/Transforms/Utils/Cloning.h"
2324

2425
#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"
2526

@@ -86,6 +87,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
8687
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));
8788

8889
uint64_t ExplicitArgOffset = 0;
90+
91+
addAliasScopeMetadata(F);
92+
8993
for (Argument &Arg : F.args()) {
9094
const bool IsByRef = Arg.hasByRefAttr();
9195
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
@@ -124,11 +128,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
124128
PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
125129
!ST.hasUsableDSOffset())
126130
continue;
127-
128-
// FIXME: We can replace this with equivalent alias.scope/noalias
129-
// metadata, but this appears to be a lot of work.
130-
if (Arg.hasNoAliasAttr())
131-
continue;
132131
}
133132

134133
auto *VT = dyn_cast<FixedVectorType>(ArgTy);
@@ -215,8 +214,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
215214
}
216215
}
217216

218-
// TODO: Convert noalias arg to !noalias
219-
220217
if (DoShiftOpt) {
221218
Value *ExtractBits = OffsetDiff == 0 ?
222219
Load : Builder.CreateLShr(Load, OffsetDiff * 8);

llvm/lib/Transforms/Utils/InlineFunction.cpp

Lines changed: 73 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
#include "llvm/IR/GlobalVariable.h"
5252
#include "llvm/IR/IRBuilder.h"
5353
#include "llvm/IR/InlineAsm.h"
54+
#include "llvm/IR/InstIterator.h"
5455
#include "llvm/IR/InstrTypes.h"
5556
#include "llvm/IR/Instruction.h"
5657
#include "llvm/IR/Instructions.h"
@@ -1111,17 +1112,30 @@ void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,
11111112
/// then add new alias scopes for each noalias argument, tag the mapped noalias
11121113
/// parameters with noalias metadata specifying the new scope, and tag all
11131114
/// non-derived loads, stores and memory intrinsics with the new alias scopes.
1114-
static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1115-
const DataLayout &DL, AAResults *CalleeAAR,
1116-
ClonedCodeInfo &InlinedFunctionInfo) {
1117-
if (!EnableNoAliasConversion)
1118-
return;
1119-
1120-
const Function *CalledFunc = CB.getCalledFunction();
1115+
static void addAliasScopeMetadataImpl(CallBase *CB, Function *F,
1116+
ValueToValueMapTy *VMap,
1117+
const DataLayout &DL,
1118+
AAResults *CalleeAAR,
1119+
ClonedCodeInfo *InlinedFunctionInfo,
1120+
bool UseNoAliasIntrinsic) {
1121+
assert(CB || F);
1122+
const Function *CalledFunc = CB ? CB->getCalledFunction() : F;
11211123
SmallVector<const Argument *, 4> NoAliasArgs;
11221124

1125+
std::function<bool(const Argument *, Attribute::AttrKind)> paramHasAttr;
1126+
if (CB) {
1127+
paramHasAttr = [&](const Argument *Arg, Attribute::AttrKind Attr) -> bool {
1128+
return CB->paramHasAttr(Arg->getArgNo(), Attr);
1129+
};
1130+
1131+
} else {
1132+
paramHasAttr = [&](const Argument *Arg, Attribute::AttrKind Attr) -> bool {
1133+
return Arg->hasAttribute(Attr);
1134+
};
1135+
}
1136+
11231137
for (const Argument &Arg : CalledFunc->args())
1124-
if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())
1138+
if (paramHasAttr(&Arg, Attribute::NoAlias) && !Arg.use_empty())
11251139
NoAliasArgs.push_back(&Arg);
11261140

11271141
if (NoAliasArgs.empty())
@@ -1163,29 +1177,20 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
11631177
NewScopes.insert(std::make_pair(A, NewScope));
11641178

11651179
if (UseNoAliasIntrinsic) {
1180+
assert(CB);
11661181
// Introduce a llvm.experimental.noalias.scope.decl for the noalias
11671182
// argument.
11681183
MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);
11691184
auto *NoAliasDecl =
1170-
IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);
1185+
IRBuilder<>(CB).CreateNoAliasScopeDeclaration(AScopeList);
11711186
// Ignore the result for now. The result will be used when the
11721187
// llvm.noalias intrinsic is introduced.
11731188
(void)NoAliasDecl;
11741189
}
11751190
}
11761191

1177-
// Iterate over all new instructions in the map; for all memory-access
1178-
// instructions, add the alias scope metadata.
1179-
for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();
1180-
VMI != VMIE; ++VMI) {
1181-
if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {
1182-
if (!VMI->second)
1183-
continue;
1184-
1185-
Instruction *NI = dyn_cast<Instruction>(VMI->second);
1186-
if (!NI || InlinedFunctionInfo.isSimplified(I, NI))
1187-
continue;
1188-
1192+
{
1193+
auto addAliasMD = [&](const Instruction *I, Instruction *NI) -> void {
11891194
bool IsArgMemOnlyCall = false, IsFuncCall = false;
11901195
SmallVector<const Value *, 2> PtrArgs;
11911196

@@ -1204,15 +1209,15 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
12041209
// know that about the inlined clone of this call site, and we don't
12051210
// need to add metadata.
12061211
if (Call->doesNotAccessMemory())
1207-
continue;
1212+
return;
12081213

12091214
IsFuncCall = true;
12101215
if (CalleeAAR) {
12111216
MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);
12121217

12131218
// We'll retain this knowledge without additional metadata.
12141219
if (ME.onlyAccessesInaccessibleMem())
1215-
continue;
1220+
return;
12161221

12171222
if (ME.onlyAccessesArgPointees())
12181223
IsArgMemOnlyCall = true;
@@ -1234,7 +1239,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
12341239
// However, if this is a call, this we might just alias with none of the
12351240
// noalias arguments.
12361241
if (PtrArgs.empty() && !IsFuncCall)
1237-
continue;
1242+
return;
12381243

12391244
// It is possible that there is only one underlying object, but you
12401245
// need to go through several PHIs to see it, and thus could be
@@ -1267,7 +1272,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
12671272
// completely describe the aliasing properties using alias.scope
12681273
// metadata (and, thus, won't add any).
12691274
if (const Argument *A = dyn_cast<Argument>(V)) {
1270-
if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))
1275+
if (!paramHasAttr(A, Attribute::NoAlias))
12711276
UsesAliasingPtr = true;
12721277
} else {
12731278
UsesAliasingPtr = true;
@@ -1289,7 +1294,7 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
12891294
// Nothing we can do if the used underlying object cannot be reliably
12901295
// determined.
12911296
if (UsesUnknownObject)
1292-
continue;
1297+
return;
12931298

12941299
// A function call can always get captured noalias pointers (via other
12951300
// parameters, globals, etc.).
@@ -1348,10 +1353,49 @@ static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
13481353
LLVMContext::MD_alias_scope,
13491354
MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),
13501355
MDNode::get(CalledFunc->getContext(), Scopes)));
1356+
};
1357+
1358+
if (VMap) {
1359+
assert(InlinedFunctionInfo);
1360+
1361+
for (ValueToValueMapTy::iterator VMI = VMap->begin(), VMIE = VMap->end();
1362+
VMI != VMIE; ++VMI) {
1363+
const Instruction *I = dyn_cast<Instruction>(VMI->first);
1364+
if (!I || !VMI->second)
1365+
continue;
1366+
1367+
Instruction *NI = dyn_cast<Instruction>(VMI->second);
1368+
if (!NI || InlinedFunctionInfo->isSimplified(I, NI))
1369+
continue;
1370+
1371+
addAliasMD(I, NI);
1372+
}
1373+
1374+
} else {
1375+
for (auto It = inst_begin(F), End = inst_end(F); It != End; ++It) {
1376+
Instruction *I = &(*It);
1377+
addAliasMD(I, I);
1378+
}
13511379
}
13521380
}
13531381
}
13541382

1383+
void llvm::addAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,
1384+
const DataLayout &DL, AAResults *CalleeAAR,
1385+
ClonedCodeInfo &InlinedFunctionInfo,
1386+
bool UseNoAliasIntrinsic) {
1387+
addAliasScopeMetadataImpl(&CB, /* F */ nullptr, &VMap, DL, CalleeAAR,
1388+
&InlinedFunctionInfo, UseNoAliasIntrinsic);
1389+
}
1390+
1391+
void llvm::addAliasScopeMetadata(Function &F) {
1392+
addAliasScopeMetadataImpl(/* CB */ nullptr, &F, /* VMap */ nullptr,
1393+
F.getParent()->getDataLayout(),
1394+
/* CalleeAAR */ nullptr,
1395+
/* InlinedFunctionInfo */ nullptr,
1396+
/* UseNoAliasIntrinsic */ false);
1397+
}
1398+
13551399
static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,
13561400
ReturnInst *End) {
13571401

@@ -2795,7 +2839,9 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,
27952839
SAMetadataCloner.remap(FirstNewBlock, Caller->end());
27962840

27972841
// Add noalias metadata if necessary.
2798-
AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);
2842+
if (EnableNoAliasConversion)
2843+
addAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo,
2844+
UseNoAliasIntrinsic);
27992845

28002846
// Clone return attributes on the callsite into the calls within the inlined
28012847
// function which feed into its return value.

0 commit comments

Comments
 (0)