Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/Target/TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class LLVM_ABI TargetMachine {
/// The LLVM Module owns a DataLayout that is used for the target independent
/// optimizations and code generation. This hook provides a target specific
/// check on the validity of this DataLayout.
bool isCompatibleDataLayout(const DataLayout &Candidate) const {
virtual bool isCompatibleDataLayout(const DataLayout &Candidate) const {
return DL == Candidate;
}

Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/IR/Verifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@
#include "llvm/Pass.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/AMDGPUAddrSpace.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
Expand Down Expand Up @@ -4498,6 +4499,13 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
"alloca on amdgpu must be in addrspace(5)", &AI);
}

if (TT.isNVPTX()) {
Check(AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_LOCAL ||
AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_GENERIC,
"AllocaInst can only be in Generic or Local address space for NVPTX.",
&AI);
}

visitInstruction(AI);
}

Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1484,7 +1484,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
if (NumBytes) {
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget())
.getPointerSize(ADDRESS_SPACE_LOCAL) == 8) {
O << "\t.reg .b64 \t%SP;\n"
<< "\t.reg .b64 \t%SPL;\n";
} else {
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,8 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
// mov %SPL, %depot;
// cvta.local %SP, %SPL;
// for local address accesses in MF.
bool Is64Bit =
static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
bool Is64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
unsigned CvtaLocalOpcode =
(Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
unsigned MovDepotOpcode =
Expand Down
5 changes: 1 addition & 4 deletions llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1803,10 +1803,7 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
{Chain, DAG.getZExtOrTrunc(Size, DL, LocalVT),
DAG.getTargetConstant(Align, DL, MVT::i32)});

SDValue ASC = DAG.getAddrSpaceCast(
DL, Op.getValueType(), Alloc, ADDRESS_SPACE_LOCAL, ADDRESS_SPACE_GENERIC);

return DAG.getMergeValues({ASC, SDValue(Alloc.getNode(), 1)}, DL);
return Alloc;
}

SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,
Expand Down
124 changes: 54 additions & 70 deletions llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
//
//===----------------------------------------------------------------------===//
//
// For all alloca instructions, and add a pair of cast to local address for
// each of them. For example,
// Change the Module's DataLayout to have the local address space for alloca's.
// Change the address space of each alloca to local and add an addrspacecast to
// generic address space. For example,
//
// %A = alloca i32
// store i32 0, i32* %A ; emits st.u32
//
// will be transformed to
//
// %A = alloca i32
// %Local = addrspacecast i32* %A to i32 addrspace(5)*
// %A = alloca i32, addrspace(5)
// %Generic = addrspacecast i32 addrspace(5)* %A to i32*
// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
//
Expand All @@ -24,18 +24,24 @@
//
//===----------------------------------------------------------------------===//

#include "MCTargetDesc/NVPTXBaseInfo.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "NVPTX.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"

using namespace llvm;
using namespace NVPTXAS;

namespace {
class NVPTXLowerAlloca : public FunctionPass {
bool runOnFunction(Function &F) override;
bool doInitialization(Module &M) override;

public:
static char ID; // Pass identification, replacement for typeid
Expand All @@ -58,77 +64,55 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
if (skipFunction(F))
return false;

bool Changed = false;
SmallVector<AllocaInst *, 16> Allocas;
for (auto &BB : F)
for (auto &I : BB) {
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
Changed = true;
for (auto &I : BB)
if (auto *Alloca = dyn_cast<AllocaInst>(&I))
if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
Allocas.push_back(Alloca);

if (Allocas.empty())
return false;

PointerType *AllocInstPtrTy =
cast<PointerType>(allocaInst->getType()->getScalarType());
unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
"AllocaInst can only be in Generic or Local address space for "
"NVPTX.");
for (AllocaInst *Alloca : Allocas) {
auto *NewAlloca = new AllocaInst(
Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL, Alloca->getArraySize(),
Alloca->getAlign(), Alloca->getName());
auto *Cast = new AddrSpaceCastInst(
NewAlloca,
PointerType::get(Alloca->getAllocatedType()->getContext(),
ADDRESS_SPACE_GENERIC),
"");
Cast->insertBefore(Alloca->getIterator());
NewAlloca->insertBefore(Cast->getIterator());
for (auto &U : llvm::make_early_inc_range(Alloca->uses())) {
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
II->getIntrinsicID() != Intrinsic::lifetime_end))
continue;

Instruction *AllocaInLocalAS = allocaInst;
auto ETy = allocaInst->getAllocatedType();
IRBuilder<> Builder(II);
Builder.CreateIntrinsic(II->getIntrinsicID(), {NewAlloca->getType()},
{NewAlloca});
II->eraseFromParent();
}

// We need to make sure that LLVM has info that alloca needs to go to
// ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
//
// For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
// ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
// the alloca's users still use a generic pointer to operate on.
//
// For allocas already in ADDRESS_SPACE_LOCAL, we just need
// addrspacecast to ADDRESS_SPACE_GENERIC.
if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
auto ASCastToLocalAS = new AddrSpaceCastInst(
allocaInst,
PointerType::get(ETy->getContext(), ADDRESS_SPACE_LOCAL), "");
ASCastToLocalAS->insertAfter(allocaInst->getIterator());
AllocaInLocalAS = ASCastToLocalAS;
}
Alloca->replaceAllUsesWith(Cast);
Alloca->eraseFromParent();
}
return true;
}

auto AllocaInGenericAS = new AddrSpaceCastInst(
AllocaInLocalAS,
PointerType::get(ETy->getContext(), ADDRESS_SPACE_GENERIC), "");
AllocaInGenericAS->insertAfter(AllocaInLocalAS->getIterator());
bool NVPTXLowerAlloca::doInitialization(Module &M) {
const auto &DL = M.getDataLayout();
if (DL.getAllocaAddrSpace() == ADDRESS_SPACE_LOCAL)
return false;
auto DLStr = DL.getStringRepresentation();

for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
// use the converted generic address, in order to expose non-generic
// addrspacecast to NVPTXInferAddressSpaces. For other types
// of instructions this is unnecessary and may introduce redundant
// address cast.
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
if (LI && LI->getPointerOperand() == allocaInst &&
!LI->isVolatile()) {
LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
if (SI && SI->getPointerOperand() == allocaInst &&
!SI->isVolatile()) {
SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
if (GI && GI->getPointerOperand() == allocaInst) {
GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
continue;
}
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
if (BI && BI->getOperand(0) == allocaInst) {
BI->setOperand(0, AllocaInGenericAS);
continue;
}
}
}
}
return Changed;
auto AddrSpaceStr = "A" + std::to_string(ADDRESS_SPACE_LOCAL);
assert(!StringRef(DLStr).contains("A") && "DataLayout should not contain A");
M.setDataLayout(DLStr.empty() ? AddrSpaceStr : DLStr + "-" + AddrSpaceStr);
return true;
}

FunctionPass *llvm::createNVPTXLowerAllocaPass() {
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,8 @@ void copyByValParam(Function &F, Argument &Arg) {
// the use of the byval parameter with this alloca instruction.
AllocA->setAlignment(
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
Arg.replaceAllUsesWith(AllocA);
auto *AddressSpaceCast = IRB.CreateAddrSpaceCast(AllocA, Arg.getType(), Arg.getName());
Arg.replaceAllUsesWith(AddressSpaceCast);

CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);

Expand Down
10 changes: 7 additions & 3 deletions llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,22 +119,26 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(FIOperandNum + 1).getImm();

// Using I0 as the frame pointer
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameLocalRegister(MF), false);
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
return false;
}

Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
return TM.is64Bit() ? NVPTX::VRFrame64 : NVPTX::VRFrame32;
return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
? NVPTX::VRFrame64
: NVPTX::VRFrame32;
}

Register
NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const {
const NVPTXTargetMachine &TM =
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
? NVPTX::VRFrameLocal64
: NVPTX::VRFrameLocal32;
}

void NVPTXRegisterInfo::clearDebugRegisterMap() const {
Expand Down
24 changes: 24 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,8 @@ void NVPTXPassConfig::addIRPasses() {
if (getOptLevel() != CodeGenOptLevel::None) {
addAddressSpaceInferencePasses();
addStraightLineScalarOptimizationPasses();
} else {
addPass(createNVPTXLowerAllocaPass());
}

addPass(createAtomicExpandLegacyPass());
Expand Down Expand Up @@ -502,3 +504,25 @@ void NVPTXPassConfig::addMachineSSAOptimization() {
addPass(&PeepholeOptimizerLegacyID);
printAndVerify("After codegen peephole optimization pass");
}

bool NVPTXTargetMachine::isCompatibleDataLayout(
const DataLayout &Candidate) const {
//XXX: Should we enforce that the Candidate DataLayout has the same address space for allocas?
if (DL == Candidate)
return true;

auto DLStr = DL.getStringRepresentation();
if (!StringRef(DLStr).contains("A"))
DLStr = DLStr.empty() ? "A" + std::to_string(ADDRESS_SPACE_LOCAL)
: DLStr + "-A" + std::to_string(ADDRESS_SPACE_LOCAL);
auto NewDL = DataLayout(DLStr);

return NewDL == Candidate;
}

unsigned NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
if (Kind == PseudoSourceValue::FixedStack) {
return ADDRESS_SPACE_LOCAL;
}
return CodeGenTargetMachineImpl::getAddressSpaceForPseudoSourceKind(Kind);
}
3 changes: 3 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ class NVPTXTargetMachine : public CodeGenTargetMachineImpl {

std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value *V) const override;

bool isCompatibleDataLayout(const DataLayout &Candidate) const override;
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override;
}; // NVPTXTargetMachine.

class NVPTXTargetMachine32 : public NVPTXTargetMachine {
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,10 @@ entry:

; CHECK: ld.param.b64 %rd[[A_REG:[0-9]+]], [kernel_func_param_0]
; CHECK: cvta.to.global.u64 %rd[[A1_REG:[0-9]+]], %rd[[A_REG]]
; CHECK: add.u64 %rd[[SP_REG:[0-9]+]], %SP, 0
; CHECK: add.u64 %rd[[SP_REG0:[0-9]+]], %SPL, 0
; CHECK: cvta.local.u64 %rd[[SP_REG:[0-9]+]], %rd[[SP_REG0]];
; CHECK: ld.global.b32 %r[[A0_REG:[0-9]+]], [%rd[[A1_REG]]]
; CHECK: st.local.b32 [{{%rd[0-9]+}}], %r[[A0_REG]]
; CHECK: st.local.b32 [%SPL], %r[[A0_REG]]

%0 = load float, ptr %a, align 4
store float %0, ptr %buf, align 4
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/NVPTX/dynamic-stackalloc-regression.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ define void @foo(i64 %a, ptr %p0, ptr %p1) {
; CHECK-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-NEXT: alloca.u64 %rd4, %rd3, 16;
; CHECK-NEXT: cvta.local.u64 %rd5, %rd4;
; CHECK-NEXT: ld.param.b64 %rd6, [foo_param_1];
; CHECK-NEXT: alloca.u64 %rd7, %rd3, 16;
; CHECK-NEXT: cvta.local.u64 %rd8, %rd7;
; CHECK-NEXT: ld.param.b64 %rd9, [foo_param_2];
; CHECK-NEXT: st.b64 [%rd6], %rd5;
; CHECK-NEXT: st.b64 [%rd9], %rd8;
; CHECK-NEXT: ld.param.b64 %rd5, [foo_param_1];
; CHECK-NEXT: cvta.local.u64 %rd6, %rd4;
; CHECK-NEXT: ld.param.b64 %rd7, [foo_param_2];
; CHECK-NEXT: alloca.u64 %rd8, %rd3, 16;
; CHECK-NEXT: cvta.local.u64 %rd9, %rd8;
; CHECK-NEXT: st.b64 [%rd5], %rd6;
; CHECK-NEXT: st.b64 [%rd7], %rd9;
; CHECK-NEXT: ret;
%b = alloca i8, i64 %a, align 16
%c = alloca i8, i64 %a, align 16
Expand Down
7 changes: 3 additions & 4 deletions llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,14 @@ define float @test_extract_i(<2 x float> %a, i64 %idx) #0 {
; CHECK-NOF32X2-EMPTY:
; CHECK-NOF32X2-NEXT: // %bb.0:
; CHECK-NOF32X2-NEXT: mov.b64 %SPL, __local_depot3;
; CHECK-NOF32X2-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NOF32X2-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_extract_i_param_0];
; CHECK-NOF32X2-NEXT: ld.param.b64 %rd1, [test_extract_i_param_1];
; CHECK-NOF32X2-NEXT: st.v2.b32 [%SP], {%r1, %r2};
; CHECK-NOF32X2-NEXT: st.local.v2.b32 [%SPL], {%r1, %r2};
; CHECK-NOF32X2-NEXT: and.b64 %rd2, %rd1, 1;
; CHECK-NOF32X2-NEXT: shl.b64 %rd3, %rd2, 2;
; CHECK-NOF32X2-NEXT: add.u64 %rd4, %SP, 0;
; CHECK-NOF32X2-NEXT: add.u64 %rd4, %SPL, 0;
; CHECK-NOF32X2-NEXT: or.b64 %rd5, %rd4, %rd3;
; CHECK-NOF32X2-NEXT: ld.b32 %r3, [%rd5];
; CHECK-NOF32X2-NEXT: ld.local.b32 %r3, [%rd5];
; CHECK-NOF32X2-NEXT: st.param.b32 [func_retval0], %r3;
; CHECK-NOF32X2-NEXT: ret;
;
Expand Down
Loading
Loading