From 6b66dce392a96c9a40010ce47c872f50e1757dcd Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 25 Nov 2024 20:39:05 +1300 Subject: [PATCH] Enhance RISCV machine outlining to support a tailcall strategy. This is modeled on the equivalent path in the AArch64 backend. Whenever the outlining candidate ends in a terminator, we can use a tail call to reach it, removing the need to use a link register or to insert a return instruction in the outlined function. This improves code size in a size-optimized build of an internal benchmark by approximately 3%. --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 107 +++++++++++++----- .../CodeGen/RISCV/machine-outliner-cfi.mir | 20 ++-- .../machine-outliner-leaf-descendants.ll | 13 ++- .../RISCV/machine-outliner-patchable.ll | 16 +-- .../RISCV/machine-outliner-position.mir | 18 +-- .../RISCV/machine-outliner-tailcall.ll | 57 ++++++++++ llvm/test/CodeGen/RISCV/machineoutliner.mir | 39 ++----- 7 files changed, 175 insertions(+), 95 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 933e776da4740..33d1d47a89d65 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2925,9 +2925,42 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB, return TargetInstrInfo::isMBBSafeToOutlineFrom(MBB, Flags); } -// Enum values indicating how an outlined call should be constructed. +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerCallViaX5 implies that the function should be called with +/// using X5 as an alternative link register. +/// +/// That is, +/// +/// I1 Materialize addr in X5 OUTLINED_FUNCTION: +/// I2 --> JAL X5 I1 +/// I3 I2 +/// I3 +/// RET X5 +/// +/// * Call construction overhead: 2 insns +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// RET I2 +/// RET +/// +/// * Call construction overhead: 2 insns +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// enum MachineOutlinerConstructionID { - MachineOutlinerDefault + MachineOutlinerCallViaX5, + MachineOutlinerTailCall }; bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault( @@ -2941,14 +2974,33 @@ RISCVInstrInfo::getOutliningCandidateInfo( std::vector &RepeatedSequenceLocs, unsigned MinRepeats) const { - // First we need to filter out candidates where the X5 register (IE t0) can't - // be used to setup the function call. - auto CannotInsertCall = [](outliner::Candidate &C) { - const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo(); - return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI); - }; + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + bool IsTailCall = RepeatedSequenceLocs[0].back().isTerminator(); + + if (!IsTailCall) { + // Filter out candidates where the X5 register (IE t0) can't + // be used to setup the function call. + auto CannotInsertCall = [](outliner::Candidate &C) { + const TargetRegisterInfo *TRI = + C.getMF()->getSubtarget().getRegisterInfo(); + if (!C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI)) + return true; - llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); + // Don't allow modifying the X5 register which we use for return addresses + // for these outlined functions. + for (const auto &MI : C) { + // FIXME: Why is this case not handled by isAvailableAcrossAndOutOfSeq + // above? + if (MI.modifiesRegister(RISCV::X5, TRI)) + return true; + } + + return false; + }; + + llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall); + } // If the sequence doesn't have enough candidates left, then we're done. if (RepeatedSequenceLocs.size() < MinRepeats) @@ -2961,8 +3013,12 @@ RISCVInstrInfo::getOutliningCandidateInfo( // call t0, function = 8 bytes. unsigned CallOverhead = 8; + + MachineOutlinerConstructionID OutlinerType = + IsTailCall ? MachineOutlinerTailCall : MachineOutlinerCallViaX5; + for (auto &C : RepeatedSequenceLocs) - C.setCallInfo(MachineOutlinerDefault, CallOverhead); + C.setCallInfo(OutlinerType, CallOverhead); // jr t0 = 4 bytes, 2 bytes if compressed instructions are enabled. unsigned FrameOverhead = 4; @@ -2972,9 +3028,12 @@ RISCVInstrInfo::getOutliningCandidateInfo( .hasStdExtCOrZca()) FrameOverhead = 2; + // There is no overhead in the frame when doing a tail call. + if (IsTailCall) + FrameOverhead = 0; + return std::make_unique( - RepeatedSequenceLocs, SequenceSize, FrameOverhead, - MachineOutlinerDefault); + RepeatedSequenceLocs, SequenceSize, FrameOverhead, OutlinerType); } outliner::InstrType @@ -2982,9 +3041,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MBBI, unsigned Flags) const { MachineInstr &MI = *MBBI; - MachineBasicBlock *MBB = MI.getParent(); - const TargetRegisterInfo *TRI = - MBB->getParent()->getSubtarget().getRegisterInfo(); const auto &F = MI.getMF()->getFunction(); // We can manually strip out CFI instructions later. @@ -2995,17 +3051,6 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal : outliner::InstrType::Invisible; - // We need support for tail calls to outlined functions before return - // statements can be allowed. - if (MI.isReturn()) - return outliner::InstrType::Illegal; - - // Don't allow modifying the X5 register which we use for return addresses for - // these outlined functions. - if (MI.modifiesRegister(RISCV::X5, TRI) || - MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5)) - return outliner::InstrType::Illegal; - // Make sure the operands don't reference something unsafe. for (const auto &MO : MI.operands()) { @@ -3041,6 +3086,9 @@ void RISCVInstrInfo::buildOutlinedFrame( MBB.addLiveIn(RISCV::X5); + if (OF.FrameConstructionID == MachineOutlinerTailCall) + return; + // Add in a return instruction to the end of the outlined frame. MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR)) .addReg(RISCV::X0, RegState::Define) @@ -3052,6 +3100,13 @@ MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const { + if (C.CallConstructionID == MachineOutlinerTailCall) { + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL)) + .addGlobalAddress(M.getNamedValue(MF.getName()), 0, + RISCVII::MO_CALL)); + return It; + } + // Add in a call instruction to the outlined function at the given location. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5) diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir index 6ecca6a1b18ef..3aca71643ecd7 100644 --- a/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir +++ b/llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir @@ -22,13 +22,11 @@ body: | ; RV32I-MO-LABEL: name: func1 ; RV32I-MO: liveins: $x10, $x11 ; RV32I-MO-NEXT: {{ $}} - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func1 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 CFI_INSTRUCTION offset $x1, 0 $x11 = ORI $x11, 1023 @@ -49,13 +47,11 @@ body: | ; RV32I-MO-LABEL: name: func2 ; RV32I-MO: liveins: $x10, $x11 ; RV32I-MO-NEXT: {{ $}} - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func2 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 CFI_INSTRUCTION offset $x1, 0 $x11 = ORI $x11, 1023 @@ -76,13 +72,11 @@ body: | ; RV32I-MO-LABEL: name: func3 ; RV32I-MO: liveins: $x10, $x11 ; RV32I-MO-NEXT: {{ $}} - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func3 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 CFI_INSTRUCTION offset $x1, -12 $x11 = ORI $x11, 1023 @@ -103,4 +97,4 @@ body: | # OUTLINED-NEXT: $x12 = ADDI $x10, 17 # OUTLINED-NEXT: $x11 = AND $x12, $x11 # OUTLINED-NEXT: $x10 = SUB $x10, $x11 -# OUTLINED-NEXT: $x0 = JALR $x5, 0 +# OUTLINED-NEXT: PseudoRET diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll index 8fab0aa9b6a76..981661466120f 100644 --- a/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll +++ b/llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll @@ -94,7 +94,8 @@ define i32 @_Z2f6v() minsize { ; CHECK-BASELINE-NEXT: li a3, 0x4 ; CHECK-BASELINE-NEXT: li a4, 0x5 ; CHECK-BASELINE-NEXT: li a5, 0x6 -; CHECK-BASELINE-NEXT: jr t0 +; CHECK-BASELINE-NEXT: auipc t1, 0x0 +; CHECK-BASELINE-NEXT: jr t1 ; CHECK-BASELINE: : ; CHECK-BASELINE-NEXT: li a0, 0x1 @@ -102,8 +103,9 @@ define i32 @_Z2f6v() minsize { ; CHECK-BASELINE-NEXT: li a2, 0x3 ; CHECK-BASELINE-NEXT: li a3, 0x4 ; CHECK-BASELINE-NEXT: li a4, 0x5 -; CHECK-BASELINE-NEXT: li a5, 0x7 -; CHECK-BASELINE-NEXT: jr t0 +; CHECK-BASELINE-NEXT: li a5, 0x8 +; CHECK-BASELINE-NEXT: auipc t1, 0x0 +; CHECK-BASELINE-NEXT: jr t1 ; CHECK-BASELINE: : ; CHECK-BASELINE-NEXT: li a0, 0x1 @@ -111,8 +113,9 @@ define i32 @_Z2f6v() minsize { ; CHECK-BASELINE-NEXT: li a2, 0x3 ; CHECK-BASELINE-NEXT: li a3, 0x4 ; CHECK-BASELINE-NEXT: li a4, 0x5 -; CHECK-BASELINE-NEXT: li a5, 0x8 -; CHECK-BASELINE-NEXT: jr t0 +; CHECK-BASELINE-NEXT: li a5, 0x7 +; CHECK-BASELINE-NEXT: auipc t1, 0x0 +; CHECK-BASELINE-NEXT: jr t1 ; CHECK-LEAF-DESCENDANTS: : ; CHECK-LEAF-DESCENDANTS-NEXT: li a0, 0x1 diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll index 4ef3abd241577..f6c293f3caf11 100644 --- a/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll +++ b/llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll @@ -10,8 +10,8 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" { ; CHECK-LABEL: fentry0: ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: # FEntry call -; CHECK: # %bb.1: -; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1 +; CHECK: .LBB0_2: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 entry: br i1 %a, label %if.then, label %if.end if.then: @@ -26,8 +26,8 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" { ; CHECK-LABEL: fentry1: ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: # FEntry call -; CHECK: # %bb.1: -; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1 +; CHECK: .LBB1_2: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 entry: br i1 %a, label %if.then, label %if.end if.then: @@ -46,8 +46,8 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK: # %bb.1: -; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1 +; CHECK: .LBB2_2: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 entry: br i1 %a, label %if.then, label %if.end if.then: @@ -64,8 +64,8 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" { ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: nop ; CHECK-NEXT: nop -; CHECK: # %bb.1: -; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1 +; CHECK: .LBB3_2: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 entry: br i1 %a, label %if.then, label %if.end if.then: diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir index 715e212eecabb..b384c5bca0a6f 100644 --- a/llvm/test/CodeGen/RISCV/machine-outliner-position.mir +++ b/llvm/test/CodeGen/RISCV/machine-outliner-position.mir @@ -25,15 +25,13 @@ body: | ; RV32I-MO-NEXT: {{ $}} ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV32I-MO-NEXT: EH_LABEL - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func1 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV64I-MO-NEXT: EH_LABEL - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 EH_LABEL $x11 = ORI $x11, 1023 @@ -53,15 +51,13 @@ body: | ; RV32I-MO-NEXT: {{ $}} ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV32I-MO-NEXT: GC_LABEL - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func2 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV64I-MO-NEXT: GC_LABEL - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 GC_LABEL $x11 = ORI $x11, 1023 @@ -81,15 +77,13 @@ body: | ; RV32I-MO-NEXT: {{ $}} ; RV32I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV32I-MO-NEXT: ANNOTATION_LABEL - ; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV32I-MO-NEXT: PseudoRET + ; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 ; RV64I-MO-LABEL: name: func3 ; RV64I-MO: liveins: $x10, $x11 ; RV64I-MO-NEXT: {{ $}} ; RV64I-MO-NEXT: $x10 = ORI $x10, 1023 ; RV64I-MO-NEXT: ANNOTATION_LABEL - ; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11 - ; RV64I-MO-NEXT: PseudoRET + ; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11 $x10 = ORI $x10, 1023 ANNOTATION_LABEL $x11 = ORI $x11, 1023 diff --git a/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll new file mode 100644 index 0000000000000..1648a302b723e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/machine-outliner-tailcall.ll @@ -0,0 +1,57 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=riscv32 < %s | FileCheck %s +source_filename = "/app/example.cpp" +target datalayout = "e-m:e-p:32:32-i64:64-n32-S128" +target triple = "riscv32-unknown-linux-gnu" + +; Function Attrs: minsize mustprogress optsize uwtable +define dso_local noundef i32 @_Z3fooiiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 { + %5 = mul nsw i32 %1, %0 + %6 = mul nsw i32 %3, %2 + %7 = add nsw i32 %6, %5 + %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2 + ret i32 %8 +} + +; Function Attrs: minsize optsize +declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #1 + +; Function Attrs: minsize mustprogress optsize uwtable +define dso_local noundef i32 @_Z3foziiii(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3) local_unnamed_addr #0 { + %5 = mul nsw i32 %1, %0 + %6 = mul nsw i32 %3, %2 + %7 = add nsw i32 %6, %5 + %8 = tail call noundef i32 @_Z3bari(i32 noundef %7) #2 + ret i32 %8 +} + +attributes #0 = { minsize mustprogress optsize uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" } +attributes #1 = { minsize optsize "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv32" "target-features"="+32bit,+a,+c,+d,+f,+m,+relax,+zaamo,+zalrsc,+zicsr,+zifencei,+zmmul,-b,-e,-experimental-smctr,-experimental-ssctr,-experimental-zalasr,-experimental-zicfilp,-experimental-zicfiss,-experimental-zvbc32e,-experimental-zvkgs,-h,-sha,-shcounterenw,-shgatpa,-shtvala,-shvsatpa,-shvstvala,-shvstvecd,-smaia,-smcdeleg,-smcsrind,-smdbltrp,-smepmp,-smmpm,-smnpm,-smrnmi,-smstateen,-ssaia,-ssccfg,-ssccptr,-sscofpmf,-sscounterenw,-sscsrind,-ssdbltrp,-ssnpm,-sspm,-ssqosid,-ssstateen,-ssstrict,-sstc,-sstvala,-sstvecd,-ssu64xl,-supm,-svade,-svadu,-svbare,-svinval,-svnapot,-svpbmt,-svvptc,-v,-xcvalu,-xcvbi,-xcvbitmanip,-xcvelw,-xcvmac,-xcvmem,-xcvsimd,-xsfcease,-xsfvcp,-xsfvfnrclipxfqf,-xsfvfwmaccqqq,-xsfvqmaccdod,-xsfvqmaccqoq,-xsifivecdiscarddlone,-xsifivecflushdlone,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-xwchc,-za128rs,-za64rs,-zabha,-zacas,-zama16b,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zca,-zcb,-zcd,-zce,-zcf,-zcmop,-zcmp,-zcmt,-zdinx,-zfa,-zfbfmin,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zic64b,-zicbom,-zicbop,-zicboz,-ziccamoa,-ziccif,-zicclsm,-ziccrse,-zicntr,-zicond,-zihintntl,-zihintpause,-zihpm,-zimop,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-ztso,-zvbb,-zvbc,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvfbfmin,-zvfbfwma,-zvfh,-zvfhmin,-zvkb,-zvkg,-zvkn,-zvknc,-zvkned,-zvkng,-zvknha,-zvknhb,-zvks,-zvksc,-zvksed,-zvksg,-zvksh,-zvkt,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" } +attributes #2 = { minsize optsize } + +!llvm.module.flags = !{!0, !1, !2, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 1, !"target-abi", !"ilp32d"} +!2 = !{i32 6, !"riscv-isa", !3} +!3 = !{!"rv32i2p1_m2p0_a2p1_f2p2_d2p2_c2p0_zicsr2p0_zifencei2p0_zmmul1p0_zaamo1p0_zalrsc1p0"} +!4 = !{i32 8, !"PIC Level", i32 2} +!5 = !{i32 7, !"PIE Level", i32 2} +!6 = !{i32 7, !"uwtable", i32 2} +!7 = !{i32 8, !"SmallDataLimit", i32 0} +!8 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git a5af6214dd0e9d53c66dc06bcd23540b05c70120)"} +; CHECK-LABEL: _Z3fooiiii: +; CHECK: # %bb.0: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 +; +; CHECK-LABEL: _Z3foziiii: +; CHECK: # %bb.0: +; CHECK-NEXT: tail OUTLINED_FUNCTION_0 +; +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: # %bb.0: +; CHECK-NEXT: mul a0, a1, a0 +; CHECK-NEXT: mul a1, a3, a2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: tail _Z3bari diff --git a/llvm/test/CodeGen/RISCV/machineoutliner.mir b/llvm/test/CodeGen/RISCV/machineoutliner.mir index 0221257354fcf..678dbc48dc581 100644 --- a/llvm/test/CodeGen/RISCV/machineoutliner.mir +++ b/llvm/test/CodeGen/RISCV/machineoutliner.mir @@ -17,9 +17,6 @@ ; Should not outline functions with named linker sections define i32 @dont_outline_1(i32 %a, i32 %b) section "named" { ret i32 0 } - ; Cannot outline if the X5 (t0) register is not free - define i32 @dont_outline_2(i32 %a, i32 %b) { ret i32 0 } - ... --- name: outline_0 @@ -29,10 +26,10 @@ body: | bb.0: liveins: $x10, $x11 ; RV32I-MO-LABEL: name: outline_0 - ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 ; ; RV64I-MO-LABEL: name: outline_0 - ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 $x11 = ORI $x11, 1023 $x12 = ADDI $x10, 17 $x11 = AND $x12, $x11 @@ -48,10 +45,10 @@ body: | bb.0: liveins: $x10, $x11 ; RV32I-MO-LABEL: name: outline_1 - ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 ; ; RV64I-MO-LABEL: name: outline_1 - ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 $x11 = ORI $x11, 1023 $x12 = ADDI $x10, 17 $x11 = AND $x12, $x11 @@ -67,10 +64,10 @@ body: | bb.0: liveins: $x10, $x11 ; RV32I-MO-LABEL: name: outline_2 - ; RV32I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV32I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 ; ; RV64I-MO-LABEL: name: outline_2 - ; RV64I-MO: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV64I-MO: PseudoTAIL {{.*}} @OUTLINED_FUNCTION_0 $x11 = ORI $x11, 1023 $x12 = ADDI $x10, 17 $x11 = AND $x12, $x11 @@ -86,10 +83,10 @@ body: | bb.0: liveins: $x10, $x11 ; RV32I-MO-LABEL: name: dont_outline_0 - ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV32I-MO-NOT: @OUTLINED_FUNCTION_0 ; ; RV64I-MO-LABEL: name: dont_outline_0 - ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 + ; RV64I-MO-NOT: @OUTLINED_FUNCTION_0 $x11 = ORI $x11, 1023 $x12 = ADDI $x10, 17 $x11 = AND $x12, $x11 @@ -115,26 +112,6 @@ body: | $x10 = SUB $x10, $x11 PseudoRET implicit $x10 -... ---- -name: dont_outline_2 -tracksRegLiveness: true -isOutlined: false -body: | - bb.0: - liveins: $x10, $x11, $x5 - ; RV32I-MO-LABEL: name: dont_outline_2 - ; RV32I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 - ; - ; RV64I-MO-LABEL: name: dont_outline_2 - ; RV64I-MO-NOT: $x5 = PseudoCALLReg {{.*}} @OUTLINED_FUNCTION_0 - $x11 = ORI $x11, 1023 - $x12 = ADDI $x10, 17 - $x11 = AND $x12, $x11 - $x10 = SUB $x10, $x11 - $x10 = ADD $x10, $x5 - PseudoRET implicit $x10 - ... # CHECK-LABEL: name: OUTLINED_FUNCTION_0