diff --git a/llvm/lib/Target/NVPTX/NVPTX.h b/llvm/lib/Target/NVPTX/NVPTX.h index b7c5a0a5c9983..b7fd7090299a9 100644 --- a/llvm/lib/Target/NVPTX/NVPTX.h +++ b/llvm/lib/Target/NVPTX/NVPTX.h @@ -76,6 +76,7 @@ void initializeNVPTXAAWrapperPassPass(PassRegistry &); void initializeNVPTXExternalAAWrapperPass(PassRegistry &); void initializeNVPTXPeepholePass(PassRegistry &); void initializeNVPTXTagInvariantLoadLegacyPassPass(PassRegistry &); +void initializeNVPTXPrologEpilogPassPass(PassRegistry &); struct NVVMIntrRangePass : PassInfoMixin { PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); diff --git a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp index 7929bd2e0df08..ababc66cc5d26 100644 --- a/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp @@ -41,7 +41,7 @@ class NVPTXPrologEpilogPass : public MachineFunctionPass { private: void calculateFrameObjectOffsets(MachineFunction &Fn); }; -} +} // end anonymous namespace MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() { return new NVPTXPrologEpilogPass(); @@ -49,6 +49,44 @@ MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() { char NVPTXPrologEpilogPass::ID = 0; +INITIALIZE_PASS(NVPTXPrologEpilogPass, DEBUG_TYPE, + "NVPTX Prologue/Epilogue Insertion", false, false) + +static bool replaceFrameIndexDebugInstr(MachineFunction &MF, MachineInstr &MI, + unsigned OpIdx) { + const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + if (MI.isDebugValue()) { + + MachineOperand &Op = MI.getOperand(OpIdx); + assert(MI.isDebugOperand(&Op) && + "Frame indices can only appear as a debug operand in a DBG_VALUE*" + " machine instruction"); + Register Reg; + unsigned FrameIdx = Op.getIndex(); + + StackOffset Offset = TFI->getFrameIndexReference(MF, FrameIdx, Reg); + Op.ChangeToRegister(Reg, false /*isDef*/); + + const DIExpression *DIExpr = MI.getDebugExpression(); + if (MI.isNonListDebugValue()) { + DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), + DIExpression::ApplyOffset, Offset); + } else { + // The debug operand at DebugOpIndex was a frame index at offset + // `Offset`; now the operand has been replaced with the frame + // register, we must add Offset with `register x, plus Offset`. + unsigned DebugOpIndex = MI.getDebugOperandIndex(&Op); + SmallVector Ops; + TRI.getOffsetOpcodes(Offset, Ops); + DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, DebugOpIndex); + } + MI.getDebugExpressionOp().setMetadata(DIExpr); + return true; + } + return false; +} + bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { const TargetSubtargetInfo &STI = MF.getSubtarget(); const TargetFrameLowering &TFI = *STI.getFrameLowering(); @@ -57,41 +95,27 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) { calculateFrameObjectOffsets(MF); - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - if (!MI.getOperand(i).isFI()) + for (MachineBasicBlock &BB : MF) { + for (MachineBasicBlock::iterator I = BB.end(); I != BB.begin();) { + MachineInstr &MI = *std::prev(I); + + bool RemovedMI = false; + for (const auto &[Idx, Op] : enumerate(MI.operands())) { + if (!Op.isFI()) continue; - // Frame indices in debug values are encoded in a target independent - // way with simply the frame index and offset rather than any - // target-specific addressing mode. - if (MI.isDebugValue()) { - MachineOperand &Op = MI.getOperand(i); - assert( - MI.isDebugOperand(&Op) && - "Frame indices can only appear as a debug operand in a DBG_VALUE*" - " machine instruction"); - Register Reg; - auto Offset = - TFI.getFrameIndexReference(MF, Op.getIndex(), Reg); - Op.ChangeToRegister(Reg, /*isDef=*/false); - const DIExpression *DIExpr = MI.getDebugExpression(); - if (MI.isNonListDebugValue()) { - DIExpr = TRI.prependOffsetExpression(MI.getDebugExpression(), DIExpression::ApplyOffset, Offset); - } else { - SmallVector Ops; - TRI.getOffsetOpcodes(Offset, Ops); - unsigned OpIdx = MI.getDebugOperandIndex(&Op); - DIExpr = DIExpression::appendOpsToArg(DIExpr, Ops, OpIdx); - } - MI.getDebugExpressionOp().setMetadata(DIExpr); + if (replaceFrameIndexDebugInstr(MF, MI, Idx)) continue; - } - TRI.eliminateFrameIndex(MI, 0, i, nullptr); + // Eliminate this FrameIndex operand. + RemovedMI = TRI.eliminateFrameIndex(MI, 0, Idx, nullptr); Modified = true; + if (RemovedMI) + break; } + + if (!RemovedMI) + --I; } } diff --git a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp index aa07d510b3a12..646b554878c70 100644 --- a/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp @@ -103,15 +103,20 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const { bool NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, - RegScavenger *RS) const { + RegScavenger *) const { assert(SPAdj == 0 && "Unexpected"); MachineInstr &MI = *II; - int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); + if (MI.isLifetimeMarker()) { + MI.eraseFromParent(); + return true; + } + + const int FrameIndex = MI.getOperand(FIOperandNum).getIndex(); - MachineFunction &MF = *MI.getParent()->getParent(); - int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) + - MI.getOperand(FIOperandNum + 1).getImm(); + const MachineFunction &MF = *MI.getParent()->getParent(); + const int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) + + MI.getOperand(FIOperandNum + 1).getImm(); // Using I0 as the frame pointer MI.getOperand(FIOperandNum).ChangeToRegister(getFrameRegister(MF), false); diff --git a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp index ef310e5828f22..0603994606d71 100644 --- a/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -115,6 +115,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() { initializeNVPTXExternalAAWrapperPass(PR); initializeNVPTXPeepholePass(PR); initializeNVPTXTagInvariantLoadLegacyPassPass(PR); + initializeNVPTXPrologEpilogPassPass(PR); } static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) { diff --git a/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll new file mode 100644 index 0000000000000..42655538cc7ad --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/frameindex-lifetime.ll @@ -0,0 +1,51 @@ +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t +; RUN: llc -mtriple=nvptx64-nvidia-cuda -opt-bisect-limit=%(line-1) -O3 %s -o %t + +;; This test is intended to verify that we don't crash when -opt-bisect-limit +;; is used in conjunction with lifetime markers. Previously, later passes +;; would not handle these intructions correctly and relied on earlier passes +;; to remove them. + +declare void @bar(ptr) + +define void @foo() { + %p = alloca i32 + call void @llvm.lifetime.start(i64 4, ptr %p) + call void @bar(ptr %p) + call void @llvm.lifetime.end(i64 4, ptr %p) + ret void +}