diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp index 78db8413e62c9..c202f7fa93db6 100644 --- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp +++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp @@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, // Adjust stack pointer. int StackAdj = StackAdjust.getImm(); int MaxTCDelta = X86FI->getTCReturnAddrDelta(); - int Offset = 0; + int64_t Offset = 0; assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive"); // Incoporate the retaddr area. @@ -297,7 +297,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB, if (Offset) { // Check for possible merge with preceding ADD instruction. - Offset += X86FL->mergeSPUpdates(MBB, MBBI, true); + Offset = X86FL->mergeSPAdd(MBB, MBBI, Offset, true); X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true); } diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp index a15db039a5ed4..50c56c9dd08b3 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -223,6 +223,8 @@ flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { return false; } +constexpr int64_t MaxSPChunk = (1LL << 31) - 1; + /// emitSPUpdate - Emit a series of instructions to increment / decrement the /// stack pointer by a constant value. void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, @@ -242,7 +244,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, return; } - uint64_t Chunk = (1LL << 31) - 1; + uint64_t Chunk = MaxSPChunk; MachineFunction &MF = *MBB.getParent(); const X86Subtarget &STI = MF.getSubtarget(); @@ -391,12 +393,15 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( return MI; } -int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MBBI, - bool doMergeWithPrevious) const { +template +int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + FoundT FoundStackAdjust, + CalcT CalcNewOffset, + bool doMergeWithPrevious) const { if ((doMergeWithPrevious && MBBI == MBB.begin()) || (!doMergeWithPrevious && MBBI == MBB.end())) - return 0; + return CalcNewOffset(0); MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; @@ -415,27 +420,38 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) PI = std::prev(PI); - unsigned Opc = PI->getOpcode(); - int Offset = 0; - - if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && - PI->getOperand(0).getReg() == StackPtr) { - assert(PI->getOperand(1).getReg() == StackPtr); - Offset = PI->getOperand(2).getImm(); - } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && - PI->getOperand(0).getReg() == StackPtr && - PI->getOperand(1).getReg() == StackPtr && - PI->getOperand(2).getImm() == 1 && - PI->getOperand(3).getReg() == X86::NoRegister && - PI->getOperand(5).getReg() == X86::NoRegister) { - // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. - Offset = PI->getOperand(4).getImm(); - } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && - PI->getOperand(0).getReg() == StackPtr) { - assert(PI->getOperand(1).getReg() == StackPtr); - Offset = -PI->getOperand(2).getImm(); - } else - return 0; + int64_t Offset = 0; + for (;;) { + unsigned Opc = PI->getOpcode(); + + if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) && + PI->getOperand(0).getReg() == StackPtr) { + assert(PI->getOperand(1).getReg() == StackPtr); + Offset = PI->getOperand(2).getImm(); + } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && + PI->getOperand(0).getReg() == StackPtr && + PI->getOperand(1).getReg() == StackPtr && + PI->getOperand(2).getImm() == 1 && + PI->getOperand(3).getReg() == X86::NoRegister && + PI->getOperand(5).getReg() == X86::NoRegister) { + // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. + Offset = PI->getOperand(4).getImm(); + } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) && + PI->getOperand(0).getReg() == StackPtr) { + assert(PI->getOperand(1).getReg() == StackPtr); + Offset = -PI->getOperand(2).getImm(); + } else + return CalcNewOffset(0); + + FoundStackAdjust(PI, Offset); + if (std::abs((int64_t)CalcNewOffset(Offset)) < MaxSPChunk) + break; + + if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end())) + return CalcNewOffset(0); + + PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI); + } PI = MBB.erase(PI); if (PI != MBB.end() && PI->isCFIInstruction()) { @@ -448,7 +464,16 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, if (!doMergeWithPrevious) MBBI = skipDebugInstructionsForward(PI, MBB.end()); - return Offset; + return CalcNewOffset(Offset); +} + +int64_t X86FrameLowering::mergeSPAdd(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int64_t AddOffset, + bool doMergeWithPrevious) const { + return mergeSPUpdates( + MBB, MBBI, [AddOffset](int64_t Offset) { return AddOffset + Offset; }, + doMergeWithPrevious); } void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, @@ -1975,8 +2000,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, // If there is an SUB32ri of ESP immediately before this instruction, merge // the two. This can be the case when tail call elimination is enabled and - // the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, true); + // the callee has more arguments than the caller. + NumBytes = mergeSPUpdates( + MBB, MBBI, [NumBytes](int64_t Offset) { return NumBytes - Offset; }, + true); // Adjust stack pointer: ESP -= numbytes. @@ -2457,7 +2484,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (HasFP) { if (X86FI->hasSwiftAsyncContext()) { // Discard the context. - int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); + int64_t Offset = mergeSPAdd(MBB, MBBI, 16, true); emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true); } // Pop EBP. @@ -2531,7 +2558,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, // If there is an ADD32ri or SUB32ri of ESP immediately before this // instruction, merge the two instructions. if (NumBytes || MFI.hasVarSizedObjects()) - NumBytes += mergeSPUpdates(MBB, MBBI, true); + NumBytes = mergeSPAdd(MBB, MBBI, NumBytes, true); // If dynamic alloca is used, then reset esp to point to the last callee-saved // slot before popping them off! Same applies for the case, when stack was @@ -2612,11 +2639,11 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { // Add the return addr area delta back since we are not tail calling. - int Offset = -1 * X86FI->getTCReturnAddrDelta(); + int64_t Offset = -1 * X86FI->getTCReturnAddrDelta(); assert(Offset >= 0 && "TCDelta should never be positive"); if (Offset) { // Check for possible merge with preceding ADD instruction. - Offset += mergeSPUpdates(MBB, Terminator, true); + Offset = mergeSPAdd(MBB, Terminator, Offset, true); emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true); } } @@ -3814,13 +3841,24 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( // Add Amount to SP to destroy a frame, or subtract to setup. int64_t StackAdjustment = isDestroy ? Amount : -Amount; + int64_t CfaAdjustment = StackAdjustment; if (StackAdjustment) { // Merge with any previous or following adjustment instruction. Note: the // instructions merged with here do not have CFI, so their stack - // adjustments do not feed into CfaAdjustment. - StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); - StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); + // adjustments do not feed into CfaAdjustment + + auto CalcCfaAdjust = [&CfaAdjustment](MachineBasicBlock::iterator PI, + int64_t Offset) { + CfaAdjustment += Offset; + }; + auto CalcNewOffset = [&StackAdjustment](int64_t Offset) { + return StackAdjustment + Offset; + }; + StackAdjustment = + mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, true); + StackAdjustment = + mergeSPUpdates(MBB, InsertPos, CalcCfaAdjust, CalcNewOffset, false); if (StackAdjustment) { if (!(F.hasMinSize() && @@ -3830,7 +3868,7 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( } } - if (DwarfCFI && !hasFP(MF)) { + if (DwarfCFI && !hasFP(MF) && CfaAdjustment) { // If we don't have FP, but need to generate unwind information, // we need to set the correct CFA offset after the stack adjustment. // How much we adjust the CFA offset depends on whether we're emitting @@ -3838,14 +3876,11 @@ MachineBasicBlock::iterator X86FrameLowering::eliminateCallFramePseudoInstr( // offset to be correct at each call site, while for debugging we want // it to be more precise. - int64_t CfaAdjustment = -StackAdjustment; // TODO: When not using precise CFA, we also need to adjust for the // InternalAmt here. - if (CfaAdjustment) { - BuildCFI( - MBB, InsertPos, DL, - MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment)); - } + BuildCFI( + MBB, InsertPos, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, -CfaAdjustment)); } return I; diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h index 02fe8ee02a7e4..ef41b4653becc 100644 --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -134,12 +134,50 @@ class X86FrameLowering : public TargetFrameLowering { processFunctionBeforeFrameIndicesReplaced(MachineFunction &MF, RegScavenger *RS) const override; - /// Check the instruction before/after the passed instruction. If - /// it is an ADD/SUB/LEA instruction it is deleted argument and the - /// stack adjustment is returned as a positive value for ADD/LEA and - /// a negative for SUB. - int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - bool doMergeWithPrevious) const; +private: + /// Basic Pseudocode: + /// if (instruction before/after the passed instruction is ADD/SUB/LEA) + /// Offset = instruction stack adjustment + /// ... positive value for ADD/LEA and negative for SUB + /// FoundStackAdjust(instruction, Offset) + /// erase(instruction) + /// return CalcNewOffset(Offset) + /// else + /// return CalcNewOffset(0) + /// + /// It's possible that the selected instruction is not immediately + /// before/after MBBI for large adjustments that have been split into multiple + /// instructions. + /// + /// FoundStackAdjust should have the signature: + /// void FoundStackAdjust(MachineBasicBlock::iterator PI, int64_t Offset) + /// CalcNewOffset should have the signature: + /// int64_t CalcNewOffset(int64_t Offset) + template + int64_t mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + FoundT FoundStackAdjust, CalcT CalcNewOffset, + bool doMergeWithPrevious) const; + + template + int64_t mergeSPUpdates(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, CalcT CalcNewOffset, + bool doMergeWithPrevious) const { + auto FoundStackAdjust = [](MachineBasicBlock::iterator MBBI, + int64_t Offset) {}; + return mergeSPUpdates(MBB, MBBI, FoundStackAdjust, CalcNewOffset, + doMergeWithPrevious); + } + +public: + /// Equivalent to: + /// mergeSPUpdates(MBB, MBBI, + /// [AddOffset](int64_t Offset) { + /// return AddOffset + Offset; + /// }, + /// doMergeWithPrevious); + int64_t mergeSPAdd(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, + int64_t AddOffset, bool doMergeWithPrevious) const; /// Emit a series of instructions to increment / decrement the stack /// pointer by a constant value. diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll new file mode 100644 index 0000000000000..b26345e2d5bbc --- /dev/null +++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s +; RUN: FileCheck --input-file=%t.s %s + +; Double-check that we are able to assemble the generated '.s'. A symptom of the +; problem that led to this test is an assembler failure when using +; '-save-temps'. For example: +; +; > ...s:683:7: error: invalid operand for instruction +; > addq $2147483679, %rsp # imm = 0x8000001F +; +; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s + +; Check that the stack update after calling bar gets merged into the second add +; and not the first which is already at the chunk size limit (0x7FFFFFFF). + +define void @foo(ptr %rhs) { +; CHECK-LABEL: foo +entry: + %lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16 + store ptr %lhs, ptr %rhs, align 8 + %0 = call i32 @baz() + call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs) +; CHECK: call{{.*}}bar +; CHECK: addq{{.*}}$2147483647, %rsp +; CHECK: addq{{.*}}$372037585, %rsp +; CHECK: .cfi_adjust_cfa_offset -2519521232 + ret void +} + +declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr) + +declare i32 @baz()