Skip to content

Commit 2945d99

Browse files
committed
riscv: improve musttail based on loongarch code
1 parent 2865ca1 commit 2945d99

File tree

3 files changed

+73
-64
lines changed

3 files changed

+73
-64
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 33 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -23420,6 +23420,7 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
2342023420
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2342123421

2342223422
MachineFunction &MF = DAG.getMachineFunction();
23423+
RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2342323424

2342423425
switch (CallConv) {
2342523426
default:
@@ -23608,6 +23609,8 @@ SDValue RISCVTargetLowering::LowerFormalArguments(
2360823609
RVFI->setVarArgsSaveSize(VarArgsSaveSize);
2360923610
}
2361023611

23612+
RVFI->setArgumentStackSize(CCInfo.getStackSize());
23613+
2361123614
// All stores are grouped in one node to allow the matching between
2361223615
// the size of Ins and InVals. This only happens for vararg functions.
2361323616
if (!OutChains.empty()) {
@@ -23629,6 +23632,7 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2362923632
auto &Outs = CLI.Outs;
2363023633
auto &Caller = MF.getFunction();
2363123634
auto CallerCC = Caller.getCallingConv();
23635+
auto *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
2363223636

2363323637
// Exception-handling functions need a special set of instructions to
2363423638
// indicate a return to the hardware. Tail-calling another function would
@@ -23638,8 +23642,9 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2363823642
if (Caller.hasFnAttribute("interrupt"))
2363923643
return false;
2364023644

23641-
// Do not tail call opt if the stack is used to pass parameters.
23642-
if (CCInfo.getStackSize() != 0)
23645+
// If the stack arguments for this call do not fit into our own save area then
23646+
// the call cannot be made tail.
23647+
if (CCInfo.getStackSize() > RVFI->getArgumentStackSize())
2364323648
return false;
2364423649

2364523650
// Do not tail call opt if any parameters need to be passed indirectly.
@@ -23658,7 +23663,7 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2365823663
// semantics.
2365923664
auto IsCallerStructRet = Caller.hasStructRetAttr();
2366023665
auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
23661-
if (IsCallerStructRet || IsCalleeStructRet)
23666+
if (IsCallerStructRet != IsCalleeStructRet)
2366223667
return false;
2366323668

2366423669
// The callee has to preserve all registers the caller needs to preserve.
@@ -23670,12 +23675,12 @@ bool RISCVTargetLowering::isEligibleForTailCallOptimization(
2367023675
return false;
2367123676
}
2367223677

23673-
// Byval parameters hand the function a pointer directly into the stack area
23674-
// we want to reuse during a tail call. Working around this *is* possible
23675-
// but less efficient and uglier in LowerCall.
23676-
for (auto &Arg : Outs)
23677-
if (Arg.Flags.isByVal())
23678-
return false;
23678+
// If the callee takes no arguments then go on to check the results of the
23679+
// call.
23680+
const MachineRegisterInfo &MRI = MF.getRegInfo();
23681+
const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
23682+
if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
23683+
return false;
2367923684

2368023685
return true;
2368123686
}
@@ -23871,20 +23876,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
2387123876
CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), i);
2387223877
} else {
2387323878
assert(VA.isMemLoc() && "Argument not register or memory");
23874-
assert(!IsTailCall && "Tail call not allowed if stack is used "
23875-
"for passing parameters");
23879+
SDValue DstAddr;
23880+
MachinePointerInfo DstInfo;
23881+
int32_t Offset = VA.getLocMemOffset();
2387623882

2387723883
// Work out the address of the stack slot.
2387823884
if (!StackPtr.getNode())
2387923885
StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
23880-
SDValue Address =
23881-
DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
23882-
DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
23886+
23887+
if (IsTailCall) {
23888+
unsigned OpSize = (VA.getValVT().getSizeInBits() + 7) / 8;
23889+
int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
23890+
DstAddr = DAG.getFrameIndex(FI, PtrVT);
23891+
DstInfo = MachinePointerInfo::getFixedStack(MF, FI);
23892+
// Make sure any stack arguments overlapping with where we're storing
23893+
// are loaded before this eventual operation. Otherwise they'll be
23894+
// clobbered.
23895+
Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
23896+
} else {
23897+
SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
23898+
DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
23899+
DstInfo = MachinePointerInfo::getStack(MF, Offset);
23900+
}
2388323901

2388423902
// Emit the store.
2388523903
MemOpChains.push_back(
23886-
DAG.getStore(Chain, DL, ArgValue, Address,
23887-
MachinePointerInfo::getStack(MF, VA.getLocMemOffset())));
23904+
DAG.getStore(Chain, DL, ArgValue, DstAddr, DstInfo));
2388823905
}
2388923906
}
2389023907

llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
6565
uint64_t RVVPadding = 0;
6666
/// Size of stack frame to save callee saved registers
6767
unsigned CalleeSavedStackSize = 0;
68+
69+
/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
70+
/// being passed on the stack
71+
unsigned ArgumentStackSize = 0;
72+
6873
/// Is there any vector argument or return?
6974
bool IsVectorCall = false;
7075

@@ -142,6 +147,9 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
142147
unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; }
143148
void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; }
144149

150+
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
151+
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
152+
145153
enum class PushPopKind { None = 0, StdExtZcmp, VendorXqccmp };
146154

147155
PushPopKind getPushPopKind(const MachineFunction &MF) const;

llvm/test/CodeGen/RISCV/tail-calls.ll

Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -204,49 +204,39 @@ declare i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %
204204
define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n) nounwind {
205205
; CHECK-LABEL: caller_args:
206206
; CHECK: # %bb.0: # %entry
207-
; CHECK-NEXT: addi sp, sp, -32
208-
; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
209-
; CHECK-NEXT: lw t0, 32(sp)
210-
; CHECK-NEXT: lw t1, 36(sp)
211-
; CHECK-NEXT: lw t2, 40(sp)
212-
; CHECK-NEXT: lw t3, 44(sp)
213-
; CHECK-NEXT: lw t4, 48(sp)
214-
; CHECK-NEXT: lw t5, 52(sp)
215-
; CHECK-NEXT: sw t4, 16(sp)
216-
; CHECK-NEXT: sw t5, 20(sp)
207+
; CHECK-NEXT: lw t0, 0(sp)
208+
; CHECK-NEXT: lw t1, 20(sp)
209+
; CHECK-NEXT: lw t2, 4(sp)
210+
; CHECK-NEXT: lw t3, 8(sp)
211+
; CHECK-NEXT: lw t4, 12(sp)
212+
; CHECK-NEXT: lw t5, 16(sp)
213+
; CHECK-NEXT: sw t2, 4(sp)
214+
; CHECK-NEXT: sw t3, 8(sp)
215+
; CHECK-NEXT: sw t4, 12(sp)
216+
; CHECK-NEXT: sw t5, 16(sp)
217+
; CHECK-NEXT: sw t1, 20(sp)
217218
; CHECK-NEXT: sw t0, 0(sp)
218-
; CHECK-NEXT: sw t1, 4(sp)
219-
; CHECK-NEXT: sw t2, 8(sp)
220-
; CHECK-NEXT: sw t3, 12(sp)
221-
; CHECK-NEXT: call callee_args
222-
; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
223-
; CHECK-NEXT: addi sp, sp, 32
224-
; CHECK-NEXT: ret
219+
; CHECK-NEXT: tail callee_args
225220
;
226221
; CHECK-LARGE-ZICFILP-LABEL: caller_args:
227222
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
228223
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
229-
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32
230-
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
231-
; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp)
232-
; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp)
233-
; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp)
234-
; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp)
235-
; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp)
236-
; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp)
237-
; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp)
238-
; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp)
224+
; CHECK-LARGE-ZICFILP-NEXT: lw t0, 0(sp)
225+
; CHECK-LARGE-ZICFILP-NEXT: lw t1, 20(sp)
226+
; CHECK-LARGE-ZICFILP-NEXT: lw t2, 4(sp)
227+
; CHECK-LARGE-ZICFILP-NEXT: lw t3, 16(sp)
228+
; CHECK-LARGE-ZICFILP-NEXT: lw t4, 12(sp)
229+
; CHECK-LARGE-ZICFILP-NEXT: lw t5, 8(sp)
230+
; CHECK-LARGE-ZICFILP-NEXT: sw t2, 4(sp)
239231
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8:
240232
; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0)
241233
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
242-
; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
243-
; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp)
244-
; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp)
234+
; CHECK-LARGE-ZICFILP-NEXT: sw t5, 8(sp)
245235
; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp)
246-
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
247-
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
248-
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32
249-
; CHECK-LARGE-ZICFILP-NEXT: ret
236+
; CHECK-LARGE-ZICFILP-NEXT: sw t3, 16(sp)
237+
; CHECK-LARGE-ZICFILP-NEXT: sw t1, 20(sp)
238+
; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp)
239+
; CHECK-LARGE-ZICFILP-NEXT: jr t2
250240
entry:
251241
%r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
252242
ret i32 %r
@@ -410,30 +400,24 @@ define i32 @caller_byval() nounwind {
410400
; CHECK-LABEL: caller_byval:
411401
; CHECK: # %bb.0: # %entry
412402
; CHECK-NEXT: addi sp, sp, -16
413-
; CHECK-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
414-
; CHECK-NEXT: lw a0, 8(sp)
415-
; CHECK-NEXT: sw a0, 4(sp)
416-
; CHECK-NEXT: addi a0, sp, 4
417-
; CHECK-NEXT: call callee_byval
418-
; CHECK-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
403+
; CHECK-NEXT: lw a1, 12(sp)
404+
; CHECK-NEXT: addi a0, sp, 8
405+
; CHECK-NEXT: sw a1, 8(sp)
419406
; CHECK-NEXT: addi sp, sp, 16
420-
; CHECK-NEXT: ret
407+
; CHECK-NEXT: tail callee_byval
421408
;
422409
; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
423410
; CHECK-LARGE-ZICFILP: # %bb.0: # %entry
424411
; CHECK-LARGE-ZICFILP-NEXT: lpad 0
425412
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16
426-
; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
427-
; CHECK-LARGE-ZICFILP-NEXT: lw a0, 8(sp)
428-
; CHECK-LARGE-ZICFILP-NEXT: sw a0, 4(sp)
413+
; CHECK-LARGE-ZICFILP-NEXT: lw a1, 12(sp)
429414
; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi12:
430415
; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI10_0)
431416
; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
432-
; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 4
433-
; CHECK-LARGE-ZICFILP-NEXT: jalr t2
434-
; CHECK-LARGE-ZICFILP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
417+
; CHECK-LARGE-ZICFILP-NEXT: addi a0, sp, 8
418+
; CHECK-LARGE-ZICFILP-NEXT: sw a1, 8(sp)
435419
; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 16
436-
; CHECK-LARGE-ZICFILP-NEXT: ret
420+
; CHECK-LARGE-ZICFILP-NEXT: jr t2
437421
entry:
438422
%a = alloca ptr
439423
%r = tail call i32 @callee_byval(ptr byval(ptr) %a)

0 commit comments

Comments
 (0)