Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 57 additions & 15 deletions llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2929,6 +2929,7 @@ bool RISCVInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,

// Enum values indicating how an outlined call should be constructed.
enum MachineOutlinerConstructionID {
MachineOutlinerTailCall,
MachineOutlinerDefault
};

Expand All @@ -2937,19 +2938,47 @@ bool RISCVInstrInfo::shouldOutlineFromFunctionByDefault(
return MF.getFunction().hasMinSize();
}

static bool IsCandidatePatchable(const MachineInstr &MI) {
const MachineBasicBlock *MBB = MI.getParent();
const MachineFunction *MF = MBB->getParent();
const Function &F = MF->getFunction();
return F.getFnAttribute("fentry-call").getValueAsBool() ||
F.hasFnAttribute("patchable-function-entry");
}

static bool CannotInsertTailCall(const MachineInstr &MI) {
if (MI.isTerminator())
return IsCandidatePatchable(MI);
return true;
}

static bool MIUseX5(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
return MI.modifiesRegister(RISCV::X5, TRI) ||
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5);
}

std::optional<std::unique_ptr<outliner::OutlinedFunction>>
RISCVInstrInfo::getOutliningCandidateInfo(
const MachineModuleInfo &MMI,
std::vector<outliner::Candidate> &RepeatedSequenceLocs,
unsigned MinRepeats) const {

// First we need to filter out candidates where the X5 register (IE t0) can't
// be used to setup the function call.
auto CannotInsertCall = [](outliner::Candidate &C) {
auto CandidateUseX5 = [](outliner::Candidate &C) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use -> Uses

const TargetRegisterInfo *TRI = C.getMF()->getSubtarget().getRegisterInfo();
for (const MachineInstr &MI : C)
if (MIUseX5(MI, TRI))
return true;
return !C.isAvailableAcrossAndOutOfSeq(RISCV::X5, *TRI);
};

auto CannotInsertCall = [CandidateUseX5](outliner::Candidate &C) {
if (!CandidateUseX5(C))
return false;
if (!CannotInsertTailCall(C.back()))
return false;
return true;
};

llvm::erase_if(RepeatedSequenceLocs, CannotInsertCall);

// If the sequence doesn't have enough candidates left, then we're done.
Expand All @@ -2961,6 +2990,17 @@ RISCVInstrInfo::getOutliningCandidateInfo(
for (auto &MI : RepeatedSequenceLocs[0])
SequenceSize += getInstSizeInBytes(MI);

if (!CannotInsertTailCall(RepeatedSequenceLocs[0].back())) {
// tail function = 8 bytes. Can't be compressed
Copy link
Collaborator

@topperc topperc Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't it be compressed? Or do you just mean it can't be compressed in the assembler. It should be compressible in the linker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rewrote it in a more precise form

    // tail call = auipc + jalr in the worst case without linker relaxation.
    CallOverhead = 4 + InstrSizeCExt;

for (auto &C : RepeatedSequenceLocs)
C.setCallInfo(MachineOutlinerTailCall, 8);

// Using tail call we move ret instrunction from caller to calee.
// So, FrameOverhead for this is 0
return std::make_unique<outliner::OutlinedFunction>(
RepeatedSequenceLocs, SequenceSize, 0, MachineOutlinerTailCall);
}

// call t0, function = 8 bytes.
unsigned CallOverhead = 8;
for (auto &C : RepeatedSequenceLocs)
Expand Down Expand Up @@ -2997,15 +3037,7 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
return F.needsUnwindTableEntry() ? outliner::InstrType::Illegal
: outliner::InstrType::Invisible;

// We need support for tail calls to outlined functions before return
// statements can be allowed.
if (MI.isReturn())
return outliner::InstrType::Illegal;

// Don't allow modifying the X5 register which we use for return addresses for
// these outlined functions.
if (MI.modifiesRegister(RISCV::X5, TRI) ||
MI.getDesc().hasImplicitDefOfPhysReg(RISCV::X5))
if (CannotInsertTailCall(MBB->back()) && MIUseX5(MI, TRI))
return outliner::InstrType::Illegal;

// Make sure the operands don't reference something unsafe.
Expand Down Expand Up @@ -3041,19 +3073,29 @@ void RISCVInstrInfo::buildOutlinedFrame(
}
}

if (OF.FrameConstructionID == MachineOutlinerTailCall)
return;

MBB.addLiveIn(RISCV::X5);

// Add in a return instruction to the end of the outlined frame.
MBB.insert(MBB.end(), BuildMI(MF, DebugLoc(), get(RISCV::JALR))
.addReg(RISCV::X0, RegState::Define)
.addReg(RISCV::X5)
.addImm(0));
.addReg(RISCV::X0, RegState::Define)
.addReg(RISCV::X5)
.addImm(0));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this intentional?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, get changes back.
It was changed by git clang-format ...

}

MachineBasicBlock::iterator RISCVInstrInfo::insertOutlinedCall(
Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
MachineFunction &MF, outliner::Candidate &C) const {

if (C.CallConstructionID == MachineOutlinerTailCall) {
It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(RISCV::PseudoTAIL))
Copy link
Collaborator

@topperc topperc Nov 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

PseudoTAIL expands to AUIPC+JALR using X6 or X7 as a temporary. If the linker doesn't relax the AUIPC+JALR sequence to JAL, then X6 or X7 will be overwritten. How do we know it is ok to overwrite X6 or X7?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the comment, I did not consider the case of the influence of registers x6 and x7 before relaxation. Apparently, even llvm-test-suite didn't get such case.
I rewrote verification algorithm for tail call. If we modify X6/X7 (Specific register get from enabled extensions info) earlier than reads from X6/X7, than my optimization can be applied.

.addGlobalAddress(M.getNamedValue(MF.getName()),
/*Offset=*/0, RISCVII::MO_CALL));
return It;
}

// Add in a call instruction to the outlined function at the given location.
It = MBB.insert(It,
BuildMI(MF, DebugLoc(), get(RISCV::PseudoCALLReg), RISCV::X5)
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/RISCV/machine-outliner-call.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: llc < %s -verify-machineinstrs -enable-machine-outliner | FileCheck %s

target triple = "riscv64-unknown-linux-gnu"

declare void @foo(i32, i32, i32, i32) minsize

define void @fentry0(i1 %a) nounwind {
; CHECK-LABEL: fentry0:
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB0_2:
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
entry:
br i1 %a, label %if.then, label %if.end
if.then:
call void @foo(i32 1, i32 2, i32 3, i32 4)
br label %if.end
if.end:
call void @foo(i32 5, i32 6, i32 7, i32 8)
ret void
}

define void @fentry1(i1 %a) nounwind {
; CHECK-LABEL: fentry1:
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB1_2:
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
entry:
br i1 %a, label %if.then, label %if.end
if.then:
call void @foo(i32 1, i32 2, i32 3, i32 4)
br label %if.end
if.end:
call void @foo(i32 5, i32 6, i32 7, i32 8)
ret void
}

define void @fentry2(i1 %a) nounwind {
; CHECK-LABEL: fentry2:
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB2_2:
; CHECK-NEXT: tail OUTLINED_FUNCTION_[[BB2:[0-9]+]]
entry:
br i1 %a, label %if.then, label %if.end
if.then:
call void @foo(i32 1, i32 2, i32 3, i32 4)
br label %if.end
if.end:
call void @foo(i32 5, i32 6, i32 7, i32 8)
ret void
}

; CHECK: OUTLINED_FUNCTION_[[BB2]]:
; CHECK: li a0, 5
; CHECK-NEXT: li a1, 6
; CHECK-NEXT: li a2, 7
; CHECK-NEXT: li a3, 8
; CHECK-NEXT: call foo

; CHECK: OUTLINED_FUNCTION_[[BB1]]:
; CHECK: li a0, 1
; CHECK-NEXT: li a1, 2
; CHECK-NEXT: li a2, 3
; CHECK-NEXT: li a3, 4
; CHECK-NEXT: jr t0
22 changes: 8 additions & 14 deletions llvm/test/CodeGen/RISCV/machine-outliner-cfi.mir
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@ body: |
; RV32I-MO-LABEL: name: func1
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV32I-MO-NEXT: PseudoRET
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func1
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV64I-MO-NEXT: PseudoRET
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
Expand All @@ -49,13 +47,11 @@ body: |
; RV32I-MO-LABEL: name: func2
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV32I-MO-NEXT: PseudoRET
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func2
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV64I-MO-NEXT: PseudoRET
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, 0
$x11 = ORI $x11, 1023
Expand All @@ -76,13 +72,11 @@ body: |
; RV32I-MO-LABEL: name: func3
; RV32I-MO: liveins: $x10, $x11
; RV32I-MO-NEXT: {{ $}}
; RV32I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV32I-MO-NEXT: PseudoRET
; RV32I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
; RV64I-MO-LABEL: name: func3
; RV64I-MO: liveins: $x10, $x11
; RV64I-MO-NEXT: {{ $}}
; RV64I-MO-NEXT: $x5 = PseudoCALLReg target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit-def $x5, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x10, implicit $x11
; RV64I-MO-NEXT: PseudoRET
; RV64I-MO-NEXT: PseudoTAIL target-flags(riscv-call) @OUTLINED_FUNCTION_0, implicit $x2, implicit-def $x10, implicit-def $x11, implicit-def $x12, implicit $x2, implicit $x10, implicit $x11
$x10 = ORI $x10, 1023
CFI_INSTRUCTION offset $x1, -12
$x11 = ORI $x11, 1023
Expand All @@ -96,11 +90,11 @@ body: |


# OUTLINED-LABEL: name: OUTLINED_FUNCTION_0
# OUTLINED: liveins: $x11, $x10, $x5
# OUTLINED: liveins: $x11, $x10
# OUTLINED-NEXT: {{ $}}
# OUTLINED-NEXT: $x10 = ORI $x10, 1023
# OUTLINED-NEXT: $x11 = ORI $x11, 1023
# OUTLINED-NEXT: $x12 = ADDI $x10, 17
# OUTLINED-NEXT: $x11 = AND $x12, $x11
# OUTLINED-NEXT: $x10 = SUB $x10, $x11
# OUTLINED-NEXT: $x0 = JALR $x5, 0
# OUTLINED-NEXT: PseudoRET
13 changes: 8 additions & 5 deletions llvm/test/CodeGen/RISCV/machine-outliner-leaf-descendants.ll
Original file line number Diff line number Diff line change
Expand Up @@ -94,25 +94,28 @@ define i32 @_Z2f6v() minsize {
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
; CHECK-BASELINE-NEXT: li a5, 0x6
; CHECK-BASELINE-NEXT: jr t0
; CHECK-BASELINE-NEXT: auipc t1, 0x0
; CHECK-BASELINE-NEXT: jr t1

; CHECK-BASELINE: <OUTLINED_FUNCTION_1>:
; CHECK-BASELINE-NEXT: li a0, 0x1
; CHECK-BASELINE-NEXT: li a1, 0x2
; CHECK-BASELINE-NEXT: li a2, 0x3
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
; CHECK-BASELINE-NEXT: li a5, 0x7
; CHECK-BASELINE-NEXT: jr t0
; CHECK-BASELINE-NEXT: li a5, 0x8
; CHECK-BASELINE-NEXT: auipc t1, 0x0
; CHECK-BASELINE-NEXT: jr t1

; CHECK-BASELINE: <OUTLINED_FUNCTION_2>:
; CHECK-BASELINE-NEXT: li a0, 0x1
; CHECK-BASELINE-NEXT: li a1, 0x2
; CHECK-BASELINE-NEXT: li a2, 0x3
; CHECK-BASELINE-NEXT: li a3, 0x4
; CHECK-BASELINE-NEXT: li a4, 0x5
; CHECK-BASELINE-NEXT: li a5, 0x8
; CHECK-BASELINE-NEXT: jr t0
; CHECK-BASELINE-NEXT: li a5, 0x7
; CHECK-BASELINE-NEXT: auipc t1, 0x0
; CHECK-BASELINE-NEXT: jr t1

; CHECK-LEAF-DESCENDANTS: <OUTLINED_FUNCTION_0>:
; CHECK-LEAF-DESCENDANTS-NEXT: li a0, 0x1
Expand Down
24 changes: 20 additions & 4 deletions llvm/test/CodeGen/RISCV/machine-outliner-patchable.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ define void @fentry0(i1 %a) nounwind "fentry-call"="true" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB0_2:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
Expand All @@ -27,7 +31,11 @@ define void @fentry1(i1 %a) nounwind "fentry-call"="true" {
; CHECK-NEXT: # %bb.0:
; CHECK-NEXT: # FEntry call
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB1_2:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
Expand All @@ -47,7 +55,11 @@ define void @patchable0(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB2_2:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
Expand All @@ -65,7 +77,11 @@ define void @patchable1(i1 %a) nounwind "patchable-function-entry"="2" {
; CHECK-NEXT: nop
; CHECK-NEXT: nop
; CHECK: # %bb.1:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_1
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB1:[0-9]+]]
; CHECK-NEXT: call foo
; CHECK-LABEL: .LBB3_2:
; CHECK-NEXT: call t0, OUTLINED_FUNCTION_[[BB2:[0-9]+]]
; CHECK-NEXT: call foo
entry:
br i1 %a, label %if.then, label %if.end
if.then:
Expand Down
Loading
Loading