@@ -638,46 +638,107 @@ void RISCVFrameLowering::allocateStack(MachineBasicBlock &MBB,
638638 return ;
639639 }
640640
641- // Do an unrolled probe loop.
642- uint64_t CurrentOffset = 0 ;
643- bool IsRV64 = STI.is64Bit ();
644- while (CurrentOffset + ProbeSize <= Offset) {
645- RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
646- StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
647- getStackAlign ());
648- // s[d|w] zero, 0(sp)
649- BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
650- .addReg (RISCV::X0)
651- .addReg (SPReg)
652- .addImm (0 )
653- .setMIFlags (MachineInstr::FrameSetup);
641+ // Unroll the probe loop depending on the number of iterations.
642+ if (Offset < ProbeSize * 5 ) {
643+ uint64_t CurrentOffset = 0 ;
644+ bool IsRV64 = STI.is64Bit ();
645+ while (CurrentOffset + ProbeSize <= Offset) {
646+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
647+ StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
648+ getStackAlign ());
649+ // s[d|w] zero, 0(sp)
650+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
651+ .addReg (RISCV::X0)
652+ .addReg (SPReg)
653+ .addImm (0 )
654+ .setMIFlags (MachineInstr::FrameSetup);
655+
656+ CurrentOffset += ProbeSize;
657+ if (EmitCFI) {
658+ // Emit ".cfi_def_cfa_offset CurrentOffset"
659+ unsigned CFIIndex = MF.addFrameInst (
660+ MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
661+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
662+ .addCFIIndex (CFIIndex)
663+ .setMIFlag (MachineInstr::FrameSetup);
664+ }
665+ }
654666
655- CurrentOffset += ProbeSize;
656- if (EmitCFI) {
657- // Emit ".cfi_def_cfa_offset CurrentOffset"
658- unsigned CFIIndex = MF.addFrameInst (
659- MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
660- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
661- .addCFIIndex (CFIIndex)
662- .setMIFlag (MachineInstr::FrameSetup);
667+ uint64_t Residual = Offset - CurrentOffset;
668+ if (Residual) {
669+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
670+ StackOffset::getFixed (-Residual), MachineInstr::FrameSetup,
671+ getStackAlign ());
672+ if (EmitCFI) {
673+ // Emit ".cfi_def_cfa_offset Offset"
674+ unsigned CFIIndex =
675+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
676+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
677+ .addCFIIndex (CFIIndex)
678+ .setMIFlag (MachineInstr::FrameSetup);
679+ }
663680 }
681+
682+ return ;
683+ }
684+
685+ // Emit a variable-length allocation probing loop.
686+ uint64_t RoundedSize = (Offset / ProbeSize) * ProbeSize;
687+ uint64_t Residual = Offset - RoundedSize;
688+
689+ Register TargetReg = RISCV::X6;
690+ // SUB TargetReg, SP, RoundedSize
691+ RI->adjustReg (MBB, MBBI, DL, TargetReg, SPReg,
692+ StackOffset::getFixed (-RoundedSize), MachineInstr::FrameSetup,
693+ getStackAlign ());
694+
695+ if (EmitCFI) {
696+ // Set the CFA register to TargetReg.
697+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (TargetReg, true );
698+ unsigned CFIIndex =
699+ MF.addFrameInst (MCCFIInstruction::cfiDefCfa (nullptr , Reg, RoundedSize));
700+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
701+ .addCFIIndex (CFIIndex)
702+ .setMIFlags (MachineInstr::FrameSetup);
703+ }
704+
705+ // It will be expanded to a probe loop in `inlineStackProbe`.
706+ BuildMI (MBB, MBBI, DL, TII->get (RISCV::PROBED_STACKALLOC))
707+ .addReg (SPReg)
708+ .addReg (TargetReg);
709+
710+ if (EmitCFI) {
711+ // Set the CFA register back to SP.
712+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (SPReg, true );
713+ unsigned CFIIndex =
714+ MF.addFrameInst (MCCFIInstruction::createDefCfaRegister (nullptr , Reg));
715+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
716+ .addCFIIndex (CFIIndex)
717+ .setMIFlags (MachineInstr::FrameSetup);
664718 }
665719
666- uint64_t Residual = Offset - CurrentOffset;
667720 if (Residual) {
668721 RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Residual),
669722 MachineInstr::FrameSetup, getStackAlign ());
670- if (EmitCFI) {
671- // Emit ".cfi_def_cfa_offset Offset"
672- unsigned CFIIndex =
673- MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
674- BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
675- .addCFIIndex (CFIIndex)
676- .setMIFlag (MachineInstr::FrameSetup);
723+ if (Residual > ProbeSize) {
724+ // s[d|w] zero, 0(sp)
725+ bool IsRV64 = STI.is64Bit ();
726+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
727+ .addReg (RISCV::X0)
728+ .addReg (SPReg)
729+ .addImm (0 )
730+ .setMIFlags (MachineInstr::FrameSetup);
677731 }
678732 }
679733
680- return ;
734+ if (EmitCFI) {
735+ // Emit ".cfi_def_cfa_offset Offset"
736+ unsigned CFIIndex =
737+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
738+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
739+ .addCFIIndex (CFIIndex)
740+ .setMIFlags (MachineInstr::FrameSetup);
741+ }
681742}
682743
683744void RISCVFrameLowering::emitPrologue (MachineFunction &MF,
@@ -1988,3 +2049,72 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
19882049TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors () const {
19892050 return TargetStackID::ScalableVector;
19902051}
2052+
2053+ // Synthesize the probe loop.
2054+ static void emitStackProbeInline (MachineFunction &MF, MachineBasicBlock &MBB,
2055+ MachineBasicBlock::iterator MBBI,
2056+ DebugLoc DL) {
2057+
2058+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
2059+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo ();
2060+ bool IsRV64 = Subtarget.is64Bit ();
2061+ Align StackAlign = Subtarget.getFrameLowering ()->getStackAlign ();
2062+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
2063+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, StackAlign);
2064+
2065+ MachineFunction::iterator MBBInsertPoint = std::next (MBB.getIterator ());
2066+ MachineBasicBlock *LoopTestMBB =
2067+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2068+ MF.insert (MBBInsertPoint, LoopTestMBB);
2069+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2070+ MF.insert (MBBInsertPoint, ExitMBB);
2071+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2072+ Register TargetReg = RISCV::X6;
2073+ Register ScratchReg = RISCV::X7;
2074+
2075+ // ScratchReg = ProbeSize
2076+ TII->movImm (MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2077+
2078+ // LoopTest:
2079+ // SUB SP, SP, ProbeSize
2080+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::SUB), SPReg)
2081+ .addReg (SPReg)
2082+ .addReg (ScratchReg)
2083+ .setMIFlags (Flags);
2084+
2085+ // s[d|w] zero, 0(sp)
2086+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL,
2087+ TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
2088+ .addReg (RISCV::X0)
2089+ .addReg (SPReg)
2090+ .addImm (0 )
2091+ .setMIFlags (Flags);
2092+
2093+ // BNE SP, TargetReg, LoopTest
2094+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::BNE))
2095+ .addReg (SPReg)
2096+ .addReg (TargetReg)
2097+ .addMBB (LoopTestMBB)
2098+ .setMIFlags (Flags);
2099+
2100+ ExitMBB->splice (ExitMBB->end (), &MBB, std::next (MBBI), MBB.end ());
2101+
2102+ LoopTestMBB->addSuccessor (ExitMBB);
2103+ LoopTestMBB->addSuccessor (LoopTestMBB);
2104+ MBB.addSuccessor (LoopTestMBB);
2105+ }
2106+
2107+ void RISCVFrameLowering::inlineStackProbe (MachineFunction &MF,
2108+ MachineBasicBlock &MBB) const {
2109+ // Get the instructions that need to be replaced. We emit at most two of
2110+ // these. Remember them in order to avoid complications coming from the need
2111+ // to traverse the block while potentially creating more blocks.
2112+ auto Where = llvm::find_if (MBB, [](MachineInstr &MI) {
2113+ return MI.getOpcode () == RISCV::PROBED_STACKALLOC;
2114+ });
2115+ if (Where != MBB.end ()) {
2116+ DebugLoc DL = MBB.findDebugLoc (Where);
2117+ emitStackProbeInline (MF, MBB, Where, DL);
2118+ Where->eraseFromParent ();
2119+ }
2120+ }
0 commit comments