@@ -608,46 +608,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
608608 return ;
609609 }
610610
611- // Do an unrolled probe loop.
612- uint64_t CurrentOffset = 0 ;
613- bool IsRV64 = STI.is64Bit ();
614- while (CurrentOffset + ProbeSize <= Offset) {
615- RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
616- StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
617- getStackAlign ());
618- // s[d|w] zero, 0(sp)
619- BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
620- .addReg (RISCV::X0)
621- .addReg (SPReg)
622- .addImm (0 )
623- .setMIFlags (MachineInstr::FrameSetup);
611+ // Unroll the probe loop depending on the number of iterations.
612+ if (Offset < ProbeSize * 5 ) {
613+ uint64_t CurrentOffset = 0 ;
614+ bool IsRV64 = STI.is64Bit ();
615+ while (CurrentOffset + ProbeSize <= Offset) {
616+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
617+ StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
618+ getStackAlign ());
619+ // s[d|w] zero, 0(sp)
620+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
621+ .addReg (RISCV::X0)
622+ .addReg (SPReg)
623+ .addImm (0 )
624+ .setMIFlags (MachineInstr::FrameSetup);
625+
626+ CurrentOffset += ProbeSize;
627+ if (EmitCFI) {
628+ // Emit ".cfi_def_cfa_offset CurrentOffset"
629+ unsigned CFIIndex = MF.addFrameInst (
630+ MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
631+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
632+ .addCFIIndex (CFIIndex)
633+ .setMIFlag (MachineInstr::FrameSetup);
634+ }
635+ }
624636
625- CurrentOffset += ProbeSize;
626- if (EmitCFI) {
627- // Emit ".cfi_def_cfa_offset CurrentOffset"
628- unsigned CFIIndex = MF.addFrameInst (
629- MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
630- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
631- .addCFIIndex (CFIIndex)
632- .setMIFlag (MachineInstr::FrameSetup);
637+ uint64_t Residual = Offset - CurrentOffset;
638+ if (Residual) {
639+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
640+ StackOffset::getFixed (-Residual), MachineInstr::FrameSetup,
641+ getStackAlign ());
642+ if (EmitCFI) {
643+ // Emit ".cfi_def_cfa_offset Offset"
644+ unsigned CFIIndex =
645+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
646+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
647+ .addCFIIndex (CFIIndex)
648+ .setMIFlag (MachineInstr::FrameSetup);
649+ }
633650 }
651+
652+ return ;
653+ }
654+
655+ // Emit a variable-length allocation probing loop.
656+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
657+ uint64_t Residual = Offset - RoundedSize;
658+
659+ Register TargetReg = RISCV::X6;
660+ // SUB TargetReg, SP, RoundedSize
661+ RI->adjustReg (MBB, MBBI, DL, TargetReg, SPReg,
662+ StackOffset::getFixed (-RoundedSize), MachineInstr::FrameSetup,
663+ getStackAlign ());
664+
665+ if (EmitCFI) {
666+ // Set the CFA register to TargetReg.
667+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (TargetReg, true );
668+ unsigned CFIIndex =
669+ MF.addFrameInst (MCCFIInstruction::cfiDefCfa (nullptr , Reg, RoundedSize));
670+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
671+ .addCFIIndex (CFIIndex)
672+ .setMIFlags (MachineInstr::FrameSetup);
673+ }
674+
675+ // It will be expanded to a probe loop in `inlineStackProbe`.
676+ BuildMI (MBB, MBBI, DL, TII->get (RISCV::PROBED_STACKALLOC))
677+ .addReg (SPReg)
678+ .addReg (TargetReg);
679+
680+ if (EmitCFI) {
681+ // Set the CFA register back to SP.
682+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (SPReg, true );
683+ unsigned CFIIndex =
684+ MF.addFrameInst (MCCFIInstruction::createDefCfaRegister (nullptr , Reg));
685+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
686+ .addCFIIndex (CFIIndex)
687+ .setMIFlags (MachineInstr::FrameSetup);
634688 }
635689
636- uint64_t Residual = Offset - CurrentOffset;
637690 if (Residual) {
638691 RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Residual),
639692 MachineInstr::FrameSetup, getStackAlign ());
640- if (EmitCFI) {
641- // Emit ".cfi_def_cfa_offset Offset"
642- unsigned CFIIndex =
643- MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
644- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
645- .addCFIIndex (CFIIndex)
646- .setMIFlag (MachineInstr::FrameSetup);
693+ if (Residual > ProbeSize) {
694+ // s[d|w] zero, 0(sp)
695+ bool IsRV64 = STI.is64Bit ();
696+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
697+ .addReg (RISCV::X0)
698+ .addReg (SPReg)
699+ .addImm (0 )
700+ .setMIFlags (MachineInstr::FrameSetup);
647701 }
648702 }
649703
650- return ;
704+ if (EmitCFI) {
705+ // Emit ".cfi_def_cfa_offset Offset"
706+ unsigned CFIIndex =
707+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
708+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
709+ .addCFIIndex (CFIIndex)
710+ .setMIFlags (MachineInstr::FrameSetup);
711+ }
651712}
652713
653714void RISCVFrameLowering::emitPrologue (MachineFunction &MF,
@@ -1962,3 +2023,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19622023TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors () const {
19632024 return TargetStackID::ScalableVector;
19642025}
2026+
2027+ // Synthesize the probe loop.
2028+ static void emitStackProbeInline (MachineFunction &MF, MachineBasicBlock &MBB,
2029+ MachineBasicBlock::iterator MBBI,
2030+ DebugLoc DL) {
2031+
2032+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
2033+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo ();
2034+ bool IsRV64 = Subtarget.is64Bit ();
2035+ Align StackAlign = Subtarget.getFrameLowering ()->getStackAlign ();
2036+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
2037+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, StackAlign);
2038+
2039+ MachineFunction::iterator MBBInsertPoint = std::next (MBB.getIterator ());
2040+ MachineBasicBlock *LoopTestMBB =
2041+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2042+ MF.insert (MBBInsertPoint, LoopTestMBB);
2043+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2044+ MF.insert (MBBInsertPoint, ExitMBB);
2045+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2046+ Register TargetReg = RISCV::X6;
2047+ Register ScratchReg = RISCV::X7;
2048+
2049+ // ScratchReg = ProbeSize
2050+ TII->movImm (MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2051+
2052+ // LoopTest:
2053+ // SUB SP, SP, ProbeSize
2054+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::SUB), SPReg)
2055+ .addReg (SPReg)
2056+ .addReg (ScratchReg)
2057+ .setMIFlags (Flags);
2058+
2059+ // s[d|w] zero, 0(sp)
2060+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL,
2061+ TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
2062+ .addReg (RISCV::X0)
2063+ .addReg (SPReg)
2064+ .addImm (0 )
2065+ .setMIFlags (Flags);
2066+
2067+ // BNE SP, TargetReg, LoopTest
2068+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::BNE))
2069+ .addReg (SPReg)
2070+ .addReg (TargetReg)
2071+ .addMBB (LoopTestMBB)
2072+ .setMIFlags (Flags);
2073+
2074+ ExitMBB->splice (ExitMBB->end (), &MBB, std::next (MBBI), MBB.end ());
2075+
2076+ LoopTestMBB->addSuccessor (ExitMBB);
2077+ LoopTestMBB->addSuccessor (LoopTestMBB);
2078+ MBB.addSuccessor (LoopTestMBB);
2079+ }
2080+
2081+ void RISCVFrameLowering::inlineStackProbe (MachineFunction &MF,
2082+ MachineBasicBlock &MBB) const {
2083+ // Get the instructions that need to be replaced. We emit at most two of
2084+ // these. Remember them in order to avoid complications coming from the need
2085+ // to traverse the block while potentially creating more blocks.
2086+ auto Where = llvm::find_if (MBB, [](MachineInstr &MI) {
2087+ return MI.getOpcode () == RISCV::PROBED_STACKALLOC;
2088+ });
2089+ if (Where != MBB.end ()) {
2090+ DebugLoc DL = MBB.findDebugLoc (Where);
2091+ emitStackProbeInline (MF, MBB, Where, DL);
2092+ Where->eraseFromParent ();
2093+ }
2094+ }
0 commit comments