Skip to content

Commit 095073e

Browse files
committed
[NVPTX] Change the alloca address space in NVPTXLowerAlloca
This patch refactors NVPTXLowerAlloca to produce simpler IR for allocas. Previously, the implementation attached a pair of consecutive address space casts to each alloca: one from addrspace(0) (generic) to addrspace(5) (local), and another immediately back to addrspace(0). Downstream passes needed to recognize this idiom to generate efficient PTX. With this patch, NVPTXLowerAlloca directly changes the address space of each alloca to "local" and inserts a single addrspacecast from local back to generic. The InferAddressSpace pass can then remove the remaining cast. This change results in fewer address-space-change (ctva) instructions in the final PTX.
1 parent 11994e8 commit 095073e

22 files changed

+342
-357
lines changed

llvm/include/llvm/Target/TargetMachine.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ class LLVM_ABI TargetMachine {
208208
/// The LLVM Module owns a DataLayout that is used for the target independent
209209
/// optimizations and code generation. This hook provides a target specific
210210
/// check on the validity of this DataLayout.
211-
bool isCompatibleDataLayout(const DataLayout &Candidate) const {
211+
virtual bool isCompatibleDataLayout(const DataLayout &Candidate) const {
212212
return DL == Candidate;
213213
}
214214

llvm/lib/IR/Verifier.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@
114114
#include "llvm/Pass.h"
115115
#include "llvm/ProfileData/InstrProf.h"
116116
#include "llvm/Support/AMDGPUAddrSpace.h"
117+
#include "llvm/Support/NVPTXAddrSpace.h"
117118
#include "llvm/Support/AtomicOrdering.h"
118119
#include "llvm/Support/Casting.h"
119120
#include "llvm/Support/CommandLine.h"
@@ -4498,6 +4499,13 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
44984499
"alloca on amdgpu must be in addrspace(5)", &AI);
44994500
}
45004501

4502+
if (TT.isNVPTX()) {
4503+
Check(AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_LOCAL ||
4504+
AI.getAddressSpace() == NVPTXAS::ADDRESS_SPACE_GENERIC,
4505+
"AllocaInst can only be in Generic or Local address space for NVPTX.",
4506+
&AI);
4507+
}
4508+
45014509
visitInstruction(AI);
45024510
}
45034511

llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1484,7 +1484,8 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
14841484
if (NumBytes) {
14851485
O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t"
14861486
<< DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n";
1487-
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
1487+
if (static_cast<const NVPTXTargetMachine &>(MF.getTarget())
1488+
.getPointerSize(ADDRESS_SPACE_LOCAL) == 8) {
14881489
O << "\t.reg .b64 \t%SP;\n"
14891490
<< "\t.reg .b64 \t%SPL;\n";
14901491
} else {

llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,8 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
4848
// mov %SPL, %depot;
4949
// cvta.local %SP, %SPL;
5050
// for local address accesses in MF.
51-
bool Is64Bit =
52-
static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit();
51+
bool Is64Bit = static_cast<const NVPTXTargetMachine &>(MF.getTarget())
52+
.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8;
5353
unsigned CvtaLocalOpcode =
5454
(Is64Bit ? NVPTX::cvta_local_64 : NVPTX::cvta_local);
5555
unsigned MovDepotOpcode =

llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1803,10 +1803,7 @@ SDValue NVPTXTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
18031803
{Chain, DAG.getZExtOrTrunc(Size, DL, LocalVT),
18041804
DAG.getTargetConstant(Align, DL, MVT::i32)});
18051805

1806-
SDValue ASC = DAG.getAddrSpaceCast(
1807-
DL, Op.getValueType(), Alloc, ADDRESS_SPACE_LOCAL, ADDRESS_SPACE_GENERIC);
1808-
1809-
return DAG.getMergeValues({ASC, SDValue(Alloc.getNode(), 1)}, DL);
1806+
return Alloc;
18101807
}
18111808

18121809
SDValue NVPTXTargetLowering::LowerSTACKRESTORE(SDValue Op,

llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp

Lines changed: 54 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// For all alloca instructions, and add a pair of cast to local address for
10-
// each of them. For example,
9+
// Change the Module's DataLayout to have the local address space for alloca's.
10+
// Change the address space of each alloca to local and add an addrspacecast to
11+
// generic address space. For example,
1112
//
1213
// %A = alloca i32
1314
// store i32 0, i32* %A ; emits st.u32
1415
//
1516
// will be transformed to
1617
//
17-
// %A = alloca i32
18-
// %Local = addrspacecast i32* %A to i32 addrspace(5)*
18+
// %A = alloca i32, addrspace(5)
1919
// %Generic = addrspacecast i32 addrspace(5)* %A to i32*
2020
// store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
2121
//
@@ -24,18 +24,24 @@
2424
//
2525
//===----------------------------------------------------------------------===//
2626

27-
#include "MCTargetDesc/NVPTXBaseInfo.h"
27+
#include "llvm/Support/NVPTXAddrSpace.h"
2828
#include "NVPTX.h"
29+
#include "llvm/ADT/SmallVector.h"
2930
#include "llvm/IR/Function.h"
31+
#include "llvm/IR/IRBuilder.h"
3032
#include "llvm/IR/Instructions.h"
33+
#include "llvm/IR/IntrinsicInst.h"
34+
#include "llvm/IR/Module.h"
3135
#include "llvm/IR/Type.h"
3236
#include "llvm/Pass.h"
3337

3438
using namespace llvm;
39+
using namespace NVPTXAS;
3540

3641
namespace {
3742
class NVPTXLowerAlloca : public FunctionPass {
3843
bool runOnFunction(Function &F) override;
44+
bool doInitialization(Module &M) override;
3945

4046
public:
4147
static char ID; // Pass identification, replacement for typeid
@@ -58,77 +64,55 @@ bool NVPTXLowerAlloca::runOnFunction(Function &F) {
5864
if (skipFunction(F))
5965
return false;
6066

61-
bool Changed = false;
67+
SmallVector<AllocaInst *, 16> Allocas;
6268
for (auto &BB : F)
63-
for (auto &I : BB) {
64-
if (auto allocaInst = dyn_cast<AllocaInst>(&I)) {
65-
Changed = true;
69+
for (auto &I : BB)
70+
if (auto *Alloca = dyn_cast<AllocaInst>(&I))
71+
if (Alloca->getAddressSpace() != ADDRESS_SPACE_LOCAL)
72+
Allocas.push_back(Alloca);
73+
74+
if (Allocas.empty())
75+
return false;
6676

67-
PointerType *AllocInstPtrTy =
68-
cast<PointerType>(allocaInst->getType()->getScalarType());
69-
unsigned AllocAddrSpace = AllocInstPtrTy->getAddressSpace();
70-
assert((AllocAddrSpace == ADDRESS_SPACE_GENERIC ||
71-
AllocAddrSpace == ADDRESS_SPACE_LOCAL) &&
72-
"AllocaInst can only be in Generic or Local address space for "
73-
"NVPTX.");
77+
for (AllocaInst *Alloca : Allocas) {
78+
auto *NewAlloca = new AllocaInst(
79+
Alloca->getAllocatedType(), ADDRESS_SPACE_LOCAL, Alloca->getArraySize(),
80+
Alloca->getAlign(), Alloca->getName());
81+
auto *Cast = new AddrSpaceCastInst(
82+
NewAlloca,
83+
PointerType::get(Alloca->getAllocatedType()->getContext(),
84+
ADDRESS_SPACE_GENERIC),
85+
"");
86+
Cast->insertBefore(Alloca->getIterator());
87+
NewAlloca->insertBefore(Cast->getIterator());
88+
for (auto &U : llvm::make_early_inc_range(Alloca->uses())) {
89+
auto *II = dyn_cast<IntrinsicInst>(U.getUser());
90+
if (!II || (II->getIntrinsicID() != Intrinsic::lifetime_start &&
91+
II->getIntrinsicID() != Intrinsic::lifetime_end))
92+
continue;
7493

75-
Instruction *AllocaInLocalAS = allocaInst;
76-
auto ETy = allocaInst->getAllocatedType();
94+
IRBuilder<> Builder(II);
95+
Builder.CreateIntrinsic(II->getIntrinsicID(), {NewAlloca->getType()},
96+
{NewAlloca});
97+
II->eraseFromParent();
98+
}
7799

78-
// We need to make sure that LLVM has info that alloca needs to go to
79-
// ADDRESS_SPACE_LOCAL for InferAddressSpace pass.
80-
//
81-
// For allocas in ADDRESS_SPACE_LOCAL, we add addrspacecast to
82-
// ADDRESS_SPACE_LOCAL and back to ADDRESS_SPACE_GENERIC, so that
83-
// the alloca's users still use a generic pointer to operate on.
84-
//
85-
// For allocas already in ADDRESS_SPACE_LOCAL, we just need
86-
// addrspacecast to ADDRESS_SPACE_GENERIC.
87-
if (AllocAddrSpace == ADDRESS_SPACE_GENERIC) {
88-
auto ASCastToLocalAS = new AddrSpaceCastInst(
89-
allocaInst,
90-
PointerType::get(ETy->getContext(), ADDRESS_SPACE_LOCAL), "");
91-
ASCastToLocalAS->insertAfter(allocaInst->getIterator());
92-
AllocaInLocalAS = ASCastToLocalAS;
93-
}
100+
Alloca->replaceAllUsesWith(Cast);
101+
Alloca->eraseFromParent();
102+
}
103+
return true;
104+
}
94105

95-
auto AllocaInGenericAS = new AddrSpaceCastInst(
96-
AllocaInLocalAS,
97-
PointerType::get(ETy->getContext(), ADDRESS_SPACE_GENERIC), "");
98-
AllocaInGenericAS->insertAfter(AllocaInLocalAS->getIterator());
106+
bool NVPTXLowerAlloca::doInitialization(Module &M) {
107+
const auto &DL = M.getDataLayout();
108+
if (DL.getAllocaAddrSpace() == ADDRESS_SPACE_LOCAL)
109+
return false;
110+
auto DLStr = DL.getStringRepresentation();
99111

100-
for (Use &AllocaUse : llvm::make_early_inc_range(allocaInst->uses())) {
101-
// Check Load, Store, GEP, and BitCast Uses on alloca and make them
102-
// use the converted generic address, in order to expose non-generic
103-
// addrspacecast to NVPTXInferAddressSpaces. For other types
104-
// of instructions this is unnecessary and may introduce redundant
105-
// address cast.
106-
auto LI = dyn_cast<LoadInst>(AllocaUse.getUser());
107-
if (LI && LI->getPointerOperand() == allocaInst &&
108-
!LI->isVolatile()) {
109-
LI->setOperand(LI->getPointerOperandIndex(), AllocaInGenericAS);
110-
continue;
111-
}
112-
auto SI = dyn_cast<StoreInst>(AllocaUse.getUser());
113-
if (SI && SI->getPointerOperand() == allocaInst &&
114-
!SI->isVolatile()) {
115-
SI->setOperand(SI->getPointerOperandIndex(), AllocaInGenericAS);
116-
continue;
117-
}
118-
auto GI = dyn_cast<GetElementPtrInst>(AllocaUse.getUser());
119-
if (GI && GI->getPointerOperand() == allocaInst) {
120-
GI->setOperand(GI->getPointerOperandIndex(), AllocaInGenericAS);
121-
continue;
122-
}
123-
auto BI = dyn_cast<BitCastInst>(AllocaUse.getUser());
124-
if (BI && BI->getOperand(0) == allocaInst) {
125-
BI->setOperand(0, AllocaInGenericAS);
126-
continue;
127-
}
128-
}
129-
}
130-
}
131-
return Changed;
112+
auto AddrSpaceStr = "A" + std::to_string(ADDRESS_SPACE_LOCAL);
113+
assert(!StringRef(DLStr).contains("A") && "DataLayout should not contain A");
114+
M.setDataLayout(DLStr.empty() ? AddrSpaceStr : DLStr + "-" + AddrSpaceStr);
115+
return true;
132116
}
133117

134118
FunctionPass *llvm::createNVPTXLowerAllocaPass() {

llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,8 @@ void copyByValParam(Function &F, Argument &Arg) {
529529
// the use of the byval parameter with this alloca instruction.
530530
AllocA->setAlignment(
531531
Arg.getParamAlign().value_or(DL.getPrefTypeAlign(StructType)));
532-
Arg.replaceAllUsesWith(AllocA);
532+
auto *AddressSpaceCast = IRB.CreateAddrSpaceCast(AllocA, Arg.getType(), Arg.getName());
533+
Arg.replaceAllUsesWith(AddressSpaceCast);
533534

534535
CallInst *ArgInParam = createNVVMInternalAddrspaceWrap(IRB, Arg);
535536

llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -119,22 +119,26 @@ bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
119119
MI.getOperand(FIOperandNum + 1).getImm();
120120

121121
// Using I0 as the frame pointer
122-
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false);
122+
MI.getOperand(FIOperandNum).ChangeToRegister(getFrameLocalRegister(MF), false);
123123
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
124124
return false;
125125
}
126126

127127
Register NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
128128
const NVPTXTargetMachine &TM =
129129
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
130-
return TM.is64Bit() ? NVPTX::VRFrame64 : NVPTX::VRFrame32;
130+
return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
131+
? NVPTX::VRFrame64
132+
: NVPTX::VRFrame32;
131133
}
132134

133135
Register
134136
NVPTXRegisterInfo::getFrameLocalRegister(const MachineFunction &MF) const {
135137
const NVPTXTargetMachine &TM =
136138
static_cast<const NVPTXTargetMachine &>(MF.getTarget());
137-
return TM.is64Bit() ? NVPTX::VRFrameLocal64 : NVPTX::VRFrameLocal32;
139+
return TM.getPointerSize(NVPTXAS::ADDRESS_SPACE_LOCAL) == 8
140+
? NVPTX::VRFrameLocal64
141+
: NVPTX::VRFrameLocal32;
138142
}
139143

140144
void NVPTXRegisterInfo::clearDebugRegisterMap() const {

llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,8 @@ void NVPTXPassConfig::addIRPasses() {
371371
if (getOptLevel() != CodeGenOptLevel::None) {
372372
addAddressSpaceInferencePasses();
373373
addStraightLineScalarOptimizationPasses();
374+
} else {
375+
addPass(createNVPTXLowerAllocaPass());
374376
}
375377

376378
addPass(createAtomicExpandLegacyPass());
@@ -502,3 +504,25 @@ void NVPTXPassConfig::addMachineSSAOptimization() {
502504
addPass(&PeepholeOptimizerLegacyID);
503505
printAndVerify("After codegen peephole optimization pass");
504506
}
507+
508+
bool NVPTXTargetMachine::isCompatibleDataLayout(
509+
const DataLayout &Candidate) const {
510+
//XXX: Should we enforce that the Candidate DataLayout has the same address space for allocas?
511+
if (DL == Candidate)
512+
return true;
513+
514+
auto DLStr = DL.getStringRepresentation();
515+
if (!StringRef(DLStr).contains("A"))
516+
DLStr = DLStr.empty() ? "A" + std::to_string(ADDRESS_SPACE_LOCAL)
517+
: DLStr + "-A" + std::to_string(ADDRESS_SPACE_LOCAL);
518+
auto NewDL = DataLayout(DLStr);
519+
520+
return NewDL == Candidate;
521+
}
522+
523+
unsigned NVPTXTargetMachine::getAddressSpaceForPseudoSourceKind(unsigned Kind) const {
524+
if (Kind == PseudoSourceValue::FixedStack) {
525+
return ADDRESS_SPACE_LOCAL;
526+
}
527+
return CodeGenTargetMachineImpl::getAddressSpaceForPseudoSourceKind(Kind);
528+
}

llvm/lib/Target/NVPTX/NVPTXTargetMachine.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ class NVPTXTargetMachine : public CodeGenTargetMachineImpl {
7676

7777
std::pair<const Value *, unsigned>
7878
getPredicatedAddrSpace(const Value *V) const override;
79+
80+
bool isCompatibleDataLayout(const DataLayout &Candidate) const override;
81+
unsigned getAddressSpaceForPseudoSourceKind(unsigned Kind) const override;
7982
}; // NVPTXTargetMachine.
8083

8184
class NVPTXTargetMachine32 : public NVPTXTargetMachine {

0 commit comments

Comments
 (0)