-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Implement tail call optimization in machine outliner #115297
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2929,6 +2929,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, | |
|
|
||
| // Enum values indicating how an outlined call should be constructed. | ||
| enum MachineOutlinerConstructionID { | ||
| MachineOutlinerTailCall, | ||
| MachineOutlinerDefault | ||
| }; | ||
|
|
||
|
|
@@ -2937,19 +2938,47 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( | |
| return MF.getFunction().hasMinSize(); | ||
| } | ||
|
|
||
| static bool isCandidatePatchable(const MachineInstr &MI) { | ||
| const MachineBasicBlock *MBB = MI.getParent(); | ||
| const MachineFunction *MF = MBB->getParent(); | ||
| const Function &F = MF->getFunction(); | ||
| return F.getFnAttribute("fentry-call").getValueAsBool() || | ||
| F.hasFnAttribute("patchable-function-entry"); | ||
| } | ||
|
|
||
| static bool cannotInsertTailCall(const MachineInstr &MI) { | ||
| if (MI.isTerminator()) | ||
| return isCandidatePatchable(MI); | ||
| return true; | ||
| } | ||
|
|
||
| static bool isMIUsesX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) { | ||
| return MI.modifiesRegister(RISCV::X5, TRI) || | ||
| MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5); | ||
| } | ||
|
|
||
| std::optional<std::unique_ptr<outliner::OutlinedFunction>> | ||
| RISCVInstrInfo::getOutliningCandidateInfo( | ||
| const MachineModuleInfo &MMI, | ||
| std::vector<outliner::Candidate> &RepeatedSequenceLocs, | ||
| unsigned MinRepeats) const { | ||
|
|
||
| // First we need to filter out candidates where the X5 register (IE t0) can't | ||
| // be used to setup the function call. | ||
| auto CannotInsertCall = [](outliner::Candidate &C) { | ||
| auto CandidateUsesX5 = [](outliner::Candidate &C) { | ||
| const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); | ||
| for (const MachineInstr &MI : C) | ||
| if (isMIUsesX5(MI, TRI)) | ||
| return true; | ||
| return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); | ||
| }; | ||
|
|
||
| auto CannotInsertCall = [CandidateUsesX5](outliner::Candidate &C) { | ||
| if (!CandidateUsesX5(C)) | ||
| return false; | ||
| if (!cannotInsertTailCall(C.back())) | ||
| return false; | ||
| return true; | ||
| }; | ||
|
|
||
| llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); | ||
|
|
||
| // If the sequence doesn't have enough candidates left, then we're done. | ||
|
|
@@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo( | |
| for (auto &MI : RepeatedSequenceLocs[0]) | ||
| SequenceSize += getInstSizeInBytes(MI); | ||
|
|
||
| if (!cannotInsertTailCall(RepeatedSequenceLocs[0].back())) { | ||
| // tail function = 8 bytes. Can't be compressed | ||
| for (auto &C : RepeatedSequenceLocs) | ||
| C.setCallInfo(MachineOutlinerTailCall, 8); | ||
|
|
||
| // Using tail call we move ret instruction from caller to calle. | ||
|
||
| // So, FrameOverhead for this is 0 | ||
| return std::make_unique<outliner::OutlinedFunction>( | ||
| RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall); | ||
| } | ||
|
|
||
| // call t0, function = 8 bytes. | ||
| unsigned CallOverhead = 8; | ||
| for (auto &C : RepeatedSequenceLocs) | ||
|
|
@@ -2997,15 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, | |
| return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal | ||
| : outliner::InstrType::Invisible; | ||
|
|
||
| // We need support for tail calls to outlined functions before return | ||
| // statements can be allowed. | ||
| if (MI.isReturn()) | ||
| return outliner::InstrType::Illegal; | ||
|
|
||
| // Don't allow modifying the X5 register which we use for return addresses for | ||
| // these outlined functions. | ||
| if (MI.modifiesRegister(RISCV::X5, TRI) || | ||
| MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) | ||
| if (cannotInsertTailCall(MBB->back()) && isMIUsesX5(MI, TRI)) | ||
| return outliner::InstrType::Illegal; | ||
|
|
||
| // Make sure the operands don't reference something unsafe. | ||
|
|
@@ -3041,6 +3073,9 @@ void RISCVInstrInfo::buildOutlinedFrame( | |
| } | ||
| } | ||
|
|
||
| if (OF.FrameConstructionID == MachineOutlinerTailCall) | ||
| return; | ||
|
|
||
| MBB.addLiveIn(RISCV::X5); | ||
|
|
||
| // Add in a return instruction to the end of the outlined frame. | ||
|
|
@@ -3054,6 +3089,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( | |
| Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, | ||
| MachineFunction &MF, outliner::Candidate &C) const { | ||
|
|
||
| if (C.CallConstructionID == MachineOutlinerTailCall) { | ||
| It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL)) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. PseudoTAIL expands to AUIPC+JALR using X6 or X7 as a temporary. If the linker doesn't relax the AUIPC+JALR sequence to JAL, then X6 or X7 will be overwritten. How do we know it is ok to overwrite X6 or X7?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the comment, I did not consider the case of the influence of registers x6 and x7 before relaxation. Apparently, even llvm-test-suite didn't get such case. |
||
| .addGlobalAddress(M.getNamedValue(MF.getName()), | ||
| /*Offset=*/0, RISCVII::MO_CALL)); | ||
| return It; | ||
| } | ||
|
|
||
| // Add in a call instruction to the outlined function at the given location. | ||
| It = MBB.insert(It, | ||
| BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,70 @@ | ||
| ; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s | ||
|
|
||
| target triple = "riscv64-unknown-linux-gnu" | ||
|
|
||
| declare void @foo(i32, i32, i32, i32) minsize | ||
|
|
||
| define void @fentry0(i1 %a) nounwind { | ||
| ; CHECK-LABEL: fentry0: | ||
| ; CHECK: # %bb.1: | ||
| ; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]] | ||
| ; CHECK-NEXT: call foo | ||
| ; CHECK-LABEL: .LBB0_2: | ||
| ; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]] | ||
| entry: | ||
| br i1 %a, label %if.then, label %if.end | ||
| if.then: | ||
| call void @foo(i32 1, i32 2, i32 3, i32 4) | ||
| br label %if.end | ||
| if.end: | ||
| call void @foo(i32 5, i32 6, i32 7, i32 8) | ||
| ret void | ||
| } | ||
|
|
||
| define void @fentry1(i1 %a) nounwind { | ||
| ; CHECK-LABEL: fentry1: | ||
| ; CHECK: # %bb.1: | ||
| ; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]] | ||
| ; CHECK-NEXT: call foo | ||
| ; CHECK-LABEL: .LBB1_2: | ||
| ; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]] | ||
| entry: | ||
| br i1 %a, label %if.then, label %if.end | ||
| if.then: | ||
| call void @foo(i32 1, i32 2, i32 3, i32 4) | ||
| br label %if.end | ||
| if.end: | ||
| call void @foo(i32 5, i32 6, i32 7, i32 8) | ||
| ret void | ||
| } | ||
|
|
||
| define void @fentry2(i1 %a) nounwind { | ||
| ; CHECK-LABEL: fentry2: | ||
| ; CHECK: # %bb.1: | ||
| ; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]] | ||
| ; CHECK-NEXT: call foo | ||
| ; CHECK-LABEL: .LBB2_2: | ||
| ; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]] | ||
| entry: | ||
| br i1 %a, label %if.then, label %if.end | ||
| if.then: | ||
| call void @foo(i32 1, i32 2, i32 3, i32 4) | ||
| br label %if.end | ||
| if.end: | ||
| call void @foo(i32 5, i32 6, i32 7, i32 8) | ||
| ret void | ||
| } | ||
|
|
||
| ; CHECK: OUTLINED_FUNCTION_[[BB2]]: | ||
| ; CHECK: li a0, 5 | ||
| ; CHECK-NEXT: li a1, 6 | ||
| ; CHECK-NEXT: li a2, 7 | ||
| ; CHECK-NEXT: li a3, 8 | ||
| ; CHECK-NEXT: call foo | ||
|
|
||
| ; CHECK: OUTLINED_FUNCTION_[[BB1]]: | ||
| ; CHECK: li a0, 1 | ||
| ; CHECK-NEXT: li a1, 2 | ||
| ; CHECK-NEXT: li a2, 3 | ||
| ; CHECK-NEXT: li a3, 4 | ||
| ; CHECK-NEXT: jr t0 |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why can't it be compressed? Or do you just mean it can't be compressed in the assembler. It should be compressible in the linker.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Rewrote it in a more precise form