Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 149 additions & 7 deletions llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,22 @@

#include "AMDGPU.h"
#include "GCNSubtarget.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/Target/TargetMachine.h"
#include <string>

#define DEBUG_TYPE "amdgpu-lower-kernel-arguments"

Expand Down Expand Up @@ -58,6 +67,143 @@ static BasicBlock::iterator getInsertPt(BasicBlock &BB) {
return InsPt;
}

static void addAliasScopeMetadata(Function &F, DataLayout const &DL) {
// Collect noalias arguments.
SmallVector<Argument const *, 4u> NoAliasArgs;

for (Argument &Arg : F.args())
if (Arg.hasNoAliasAttr() && !Arg.use_empty())
NoAliasArgs.push_back(&Arg);

if (NoAliasArgs.empty())
return;

// Add alias scopes for each noalias argument.
MDBuilder MDB(F.getContext());
DenseMap<Argument const *, MDNode *> NewScopes;
MDNode *NewDomain = MDB.createAnonymousAliasScopeDomain(F.getName());

for (unsigned I = 0u; I < NoAliasArgs.size(); ++I) {
Argument const *Arg = NoAliasArgs[I];
std::string Name(F.getName());

if (Arg->hasName())
Name += std::string(": %") + std::string(Arg->getName());
else
Name += std::string(": argument ") + std::to_string(I);

MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);
NewScopes.insert(std::make_pair(Arg, NewScope));
}

// Iterate over all instructions.
DominatorTree DT;
DT.recalculate(F);

for (inst_iterator Inst = inst_begin(F); Inst != inst_end(F); ++Inst) {
// If instruction accesses memory, collect its pointer arguments.
Instruction *I = &(*Inst);
bool IsFuncCall = false;
SmallVector<Value const *, 2u> PtrArgs;

if (LoadInst *LI = dyn_cast<LoadInst>(I))
PtrArgs.push_back(LI->getPointerOperand());
else if (StoreInst *SI = dyn_cast<StoreInst>(I))
PtrArgs.push_back(SI->getPointerOperand());
else if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
PtrArgs.push_back(VAAI->getPointerOperand());
else if (AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))
PtrArgs.push_back(CXI->getPointerOperand());
else if (AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))
PtrArgs.push_back(RMWI->getPointerOperand());
else if (CallBase *Call = dyn_cast<CallBase>(I)) {
if (Call->doesNotAccessMemory())
continue;

IsFuncCall = true;

for (Use &Arg : Call->args()) {
if (!Arg->getType()->isPointerTy())
continue;

PtrArgs.push_back(Arg);
}
}

if (PtrArgs.empty() && !IsFuncCall)
continue;

// Collect underlying objects of pointer arguments.
SmallVector<Metadata *, 4u> Scopes;
SmallPtrSet<Value const *, 4u> ObjSet;
SmallVector<Metadata *, 4u> NoAliases;

for (Value const *&Ptr : PtrArgs) {
SmallVector<Value const *, 4u> Objects;
getUnderlyingObjects(Ptr, Objects);
ObjSet.insert_range(Objects);
}

bool RequiresNoCaptureBefore = false;
bool UsesUnknownObject = false;
bool UsesAliasingPtr = false;

for (Value const *Val : ObjSet) {
if (isa<ConstantPointerNull>(Val) || isa<ConstantDataVector>(Val) ||
isa<ConstantInt>(Val) || isa<ConstantFP>(Val) || isa<UndefValue>(Val))
continue;

if (Argument const *Arg = dyn_cast<Argument>(Val)) {
if (!Arg->hasAttribute(Attribute::NoAlias))
UsesAliasingPtr = true;
} else
UsesAliasingPtr = true;

if (isEscapeSource(Val))
RequiresNoCaptureBefore = true;
else if (!isa<Argument>(Val) && isIdentifiedObject(Val))
UsesUnknownObject = true;
}

if (UsesUnknownObject)
continue;

// Collect noalias scopes for instruction.
for (Argument const *Arg : NoAliasArgs) {
if (ObjSet.contains(Arg))
continue;

if (!RequiresNoCaptureBefore ||
!capturesAnything(PointerMayBeCapturedBefore(
Arg, false, I, &DT, false, CaptureComponents::Provenance)))
NoAliases.push_back(NewScopes[Arg]);
}

// Add noalias metadata to instruction.
if (!NoAliases.empty()) {
MDNode *NewMD =
MDNode::concatenate(Inst->getMetadata(LLVMContext::MD_noalias),
MDNode::get(F.getContext(), NoAliases));
Inst->setMetadata(LLVMContext::MD_noalias, NewMD);
}

// Collect scopes for alias.scope metadata.
if (!UsesAliasingPtr)
for (Argument const *Arg : NoAliasArgs) {
if (ObjSet.count(Arg))
Scopes.push_back(NewScopes[Arg]);
}

// Add alias.scope metadata to instruction.
if (!Scopes.empty()) {
MDNode *NewMD =
MDNode::concatenate(Inst->getMetadata(LLVMContext::MD_alias_scope),
MDNode::get(F.getContext(), Scopes));
Inst->setMetadata(LLVMContext::MD_alias_scope, NewMD);
}
}
}

static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::AMDGPU_KERNEL || F.arg_empty())
Expand Down Expand Up @@ -86,6 +232,9 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
Attribute::getWithDereferenceableBytes(Ctx, TotalKernArgSize));

uint64_t ExplicitArgOffset = 0;

addAliasScopeMetadata(F, F.getParent()->getDataLayout());

for (Argument &Arg : F.args()) {
const bool IsByRef = Arg.hasByRefAttr();
Type *ArgTy = IsByRef ? Arg.getParamByRefType() : Arg.getType();
Expand Down Expand Up @@ -124,11 +273,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
PT->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) &&
!ST.hasUsableDSOffset())
continue;

// FIXME: We can replace this with equivalent alias.scope/noalias
// metadata, but this appears to be a lot of work.
if (Arg.hasNoAliasAttr())
continue;
}

auto *VT = dyn_cast<FixedVectorType>(ArgTy);
Expand Down Expand Up @@ -215,8 +359,6 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
}
}

// TODO: Convert noalias arg to !noalias

if (DoShiftOpt) {
Value *ExtractBits = OffsetDiff == 0 ?
Load : Builder.CreateLShr(Load, OffsetDiff * 8);
Expand Down
Loading