@@ -580,25 +580,124 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
580580 Comment.str ());
581581}
582582
583+ // Allocate stack space and probe it if necessary.
583584void RISCVFrameLowering::allocateStack (MachineBasicBlock &MBB,
584585 MachineBasicBlock::iterator MBBI,
585- MachineFunction &MF, StackOffset Offset,
586- uint64_t RealStackSize,
587- bool EmitCFI) const {
586+ MachineFunction &MF, uint64_t Offset,
587+ uint64_t RealStackSize, bool EmitCFI,
588+ bool NeedProbe,
589+ uint64_t ProbeSize) const {
588590 DebugLoc DL;
589591 const RISCVRegisterInfo *RI = STI.getRegisterInfo ();
590592 const RISCVInstrInfo *TII = STI.getInstrInfo ();
591593
592- RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
594+ // Simply allocate the stack if it's not big enough to require a probe.
595+ if (!NeedProbe || Offset <= ProbeSize) {
596+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Offset),
597+ MachineInstr::FrameSetup, getStackAlign ());
598+
599+ if (EmitCFI) {
600+ // Emit ".cfi_def_cfa_offset RealStackSize"
601+ unsigned CFIIndex = MF.addFrameInst (
602+ MCCFIInstruction::cfiDefCfaOffset (nullptr , RealStackSize));
603+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
604+ .addCFIIndex (CFIIndex)
605+ .setMIFlag (MachineInstr::FrameSetup);
606+ }
607+
608+ return ;
609+ }
610+
611+ // Unroll the probe loop depending on the number of iterations.
612+ if (Offset < ProbeSize * 5 ) {
613+ uint64_t CurrentOffset = 0 ;
614+ bool IsRV64 = STI.is64Bit ();
615+ while (CurrentOffset + ProbeSize <= Offset) {
616+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
617+ StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
618+ getStackAlign ());
619+ // s[d|w] zero, 0(sp)
620+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
621+ .addReg (RISCV::X0)
622+ .addReg (SPReg)
623+ .addImm (0 )
624+ .setMIFlags (MachineInstr::FrameSetup);
625+
626+ CurrentOffset += ProbeSize;
627+ if (EmitCFI) {
628+ // Emit ".cfi_def_cfa_offset CurrentOffset"
629+ unsigned CFIIndex = MF.addFrameInst (
630+ MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
631+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
632+ .addCFIIndex (CFIIndex)
633+ .setMIFlag (MachineInstr::FrameSetup);
634+ }
635+ }
636+
637+ uint64_t Residual = Offset - CurrentOffset;
638+ if (Residual) {
639+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
640+ StackOffset::getFixed (-Residual), MachineInstr::FrameSetup,
641+ getStackAlign ());
642+ if (EmitCFI) {
643+ // Emit ".cfi_def_cfa_offset Offset"
644+ unsigned CFIIndex =
645+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
646+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
647+ .addCFIIndex (CFIIndex)
648+ .setMIFlag (MachineInstr::FrameSetup);
649+ }
650+ }
651+
652+ return ;
653+ }
654+
655+ // Emit a variable-length allocation probing loop.
656+ uint64_t RoundedSize = alignDown (Offset, ProbeSize);
657+ uint64_t Residual = Offset - RoundedSize;
658+
659+ Register TargetReg = RISCV::X6;
660+ // SUB TargetReg, SP, RoundedSize
661+ RI->adjustReg (MBB, MBBI, DL, TargetReg, SPReg,
662+ StackOffset::getFixed (-RoundedSize), MachineInstr::FrameSetup,
593663 getStackAlign ());
594664
595665 if (EmitCFI) {
596- // Emit ".cfi_def_cfa_offset RealStackSize"
597- unsigned CFIIndex = MF.addFrameInst (
598- MCCFIInstruction::cfiDefCfaOffset (nullptr , RealStackSize));
666+ // Set the CFA register to TargetReg.
667+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (TargetReg, true );
668+ unsigned CFIIndex =
669+ MF.addFrameInst (MCCFIInstruction::cfiDefCfa (nullptr , Reg, RoundedSize));
599670 BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
600671 .addCFIIndex (CFIIndex)
601- .setMIFlag (MachineInstr::FrameSetup);
672+ .setMIFlags (MachineInstr::FrameSetup);
673+ }
674+
675+ // It will be expanded to a probe loop in `inlineStackProbe`.
676+ BuildMI (MBB, MBBI, DL, TII->get (RISCV::PROBED_STACKALLOC))
677+ .addReg (SPReg)
678+ .addReg (TargetReg);
679+
680+ if (EmitCFI) {
681+ // Set the CFA register back to SP.
682+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (SPReg, true );
683+ unsigned CFIIndex =
684+ MF.addFrameInst (MCCFIInstruction::createDefCfaRegister (nullptr , Reg));
685+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
686+ .addCFIIndex (CFIIndex)
687+ .setMIFlags (MachineInstr::FrameSetup);
688+ }
689+
690+ if (Residual)
691+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Residual),
692+ MachineInstr::FrameSetup, getStackAlign ());
693+
694+ if (EmitCFI) {
695+ // Emit ".cfi_def_cfa_offset Offset"
696+ unsigned CFIIndex =
697+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
698+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
699+ .addCFIIndex (CFIIndex)
700+ .setMIFlags (MachineInstr::FrameSetup);
602701 }
603702}
604703
@@ -716,11 +815,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
716815 getPushOrLibCallsSavedInfo (MF, CSI));
717816 }
718817
719- if (StackSize != 0 ) {
720- // Allocate space on the stack if necessary.
721- allocateStack (MBB, MBBI, MF, StackOffset::getFixed (-StackSize),
722- RealStackSize, /* EmitCFI=*/ true );
723- }
818+ // Allocate space on the stack if necessary.
819+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
820+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
821+ bool NeedProbe = TLI->hasInlineStackProbe (MF);
822+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, getStackAlign ());
823+ if (StackSize != 0 )
824+ allocateStack (MBB, MBBI, MF, StackSize, RealStackSize, /* EmitCFI=*/ true ,
825+ NeedProbe, ProbeSize);
724826
725827 // The frame pointer is callee-saved, and code has been generated for us to
726828 // save it to the stack. We need to skip over the storing of callee-saved
@@ -761,8 +863,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
761863 assert (SecondSPAdjustAmount > 0 &&
762864 " SecondSPAdjustAmount should be greater than zero" );
763865
764- allocateStack (MBB, MBBI, MF, StackOffset::getFixed (-SecondSPAdjustAmount),
765- getStackSizeWithRVVPadding (MF), !hasFP (MF));
866+ allocateStack (MBB, MBBI, MF, SecondSPAdjustAmount,
867+ getStackSizeWithRVVPadding (MF), !hasFP (MF), NeedProbe,
868+ ProbeSize);
766869 }
767870
768871 if (RVVStackSize) {
@@ -1910,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19102013TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors () const {
19112014 return TargetStackID::ScalableVector;
19122015}
2016+
2017+ // Synthesize the probe loop.
2018+ static void emitStackProbeInline (MachineFunction &MF, MachineBasicBlock &MBB,
2019+ MachineBasicBlock::iterator MBBI,
2020+ DebugLoc DL) {
2021+
2022+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
2023+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo ();
2024+ bool IsRV64 = Subtarget.is64Bit ();
2025+ Align StackAlign = Subtarget.getFrameLowering ()->getStackAlign ();
2026+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
2027+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, StackAlign);
2028+
2029+ MachineFunction::iterator MBBInsertPoint = std::next (MBB.getIterator ());
2030+ MachineBasicBlock *LoopTestMBB =
2031+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2032+ MF.insert (MBBInsertPoint, LoopTestMBB);
2033+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2034+ MF.insert (MBBInsertPoint, ExitMBB);
2035+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2036+ Register TargetReg = RISCV::X6;
2037+ Register ScratchReg = RISCV::X7;
2038+
2039+ // ScratchReg = ProbeSize
2040+ TII->movImm (MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2041+
2042+ // LoopTest:
2043+ // SUB SP, SP, ProbeSize
2044+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::SUB), SPReg)
2045+ .addReg (SPReg)
2046+ .addReg (ScratchReg)
2047+ .setMIFlags (Flags);
2048+
2049+ // s[d|w] zero, 0(sp)
2050+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL,
2051+ TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
2052+ .addReg (RISCV::X0)
2053+ .addReg (SPReg)
2054+ .addImm (0 )
2055+ .setMIFlags (Flags);
2056+
2057+ // BNE SP, TargetReg, LoopTest
2058+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::BNE))
2059+ .addReg (SPReg)
2060+ .addReg (TargetReg)
2061+ .addMBB (LoopTestMBB)
2062+ .setMIFlags (Flags);
2063+
2064+ ExitMBB->splice (ExitMBB->end (), &MBB, std::next (MBBI), MBB.end ());
2065+
2066+ LoopTestMBB->addSuccessor (ExitMBB);
2067+ LoopTestMBB->addSuccessor (LoopTestMBB);
2068+ MBB.addSuccessor (LoopTestMBB);
2069+ }
2070+
2071+ void RISCVFrameLowering::inlineStackProbe (MachineFunction &MF,
2072+ MachineBasicBlock &MBB) const {
2073+ auto Where = llvm::find_if (MBB, [](MachineInstr &MI) {
2074+ return MI.getOpcode () == RISCV::PROBED_STACKALLOC;
2075+ });
2076+ if (Where != MBB.end ()) {
2077+ DebugLoc DL = MBB.findDebugLoc (Where);
2078+ emitStackProbeInline (MF, MBB, Where, DL);
2079+ Where->eraseFromParent ();
2080+ }
2081+ }
0 commit comments