diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index ec6367a803506..7ab8f3b8b40f5 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2961,50 +2961,6 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size, Size = std::max(Size - Excess, 0); } -/// MatchingStackOffset - Return true if the given stack call argument is -/// already available in the same position (relatively) of the caller's -/// incoming argument stack. -static -bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, - MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { - unsigned Bytes = Arg.getValueSizeInBits() / 8; - int FI = std::numeric_limits::max(); - if (Arg.getOpcode() == ISD::CopyFromReg) { - Register VR = cast(Arg.getOperand(1))->getReg(); - if (!VR.isVirtual()) - return false; - MachineInstr *Def = MRI->getVRegDef(VR); - if (!Def) - return false; - if (!Flags.isByVal()) { - if (!TII->isLoadFromStackSlot(*Def, FI)) - return false; - } else { - return false; - } - } else if (LoadSDNode *Ld = dyn_cast(Arg)) { - if (Flags.isByVal()) - // ByVal argument is passed in as a pointer but it's now being - // dereferenced. e.g. - // define @foo(%struct.X* %A) { - // tail call @bar(%struct.X* byval %A) - // } - return false; - SDValue Ptr = Ld->getBasePtr(); - FrameIndexSDNode *FINode = dyn_cast(Ptr); - if (!FINode) - return false; - FI = FINode->getIndex(); - } else - return false; - - assert(FI != std::numeric_limits::max()); - if (!MFI.isFixedObjectIndex(FI)) - return false; - return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI); -} - /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call /// optimization should implement this function. Note that this function also @@ -3046,8 +3002,10 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( for (const CCValAssign &AL : ArgLocs) if (AL.isRegLoc()) AddressRegisters.erase(AL.getLocReg()); - if (AddressRegisters.empty()) + if (AddressRegisters.empty()) { + LLVM_DEBUG(dbgs() << "false (no space for target address)\n"); return false; + } } // Look for obvious safe cases to perform tail call optimization that do not @@ -3056,18 +3014,26 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // Exception-handling functions need a special set of instructions to indicate // a return to the hardware. Tail-calling another function would probably // break this. - if (CallerF.hasFnAttribute("interrupt")) + if (CallerF.hasFnAttribute("interrupt")) { + LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n"); return false; + } - if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt)) + if (canGuaranteeTCO(CalleeCC, + getTargetMachine().Options.GuaranteedTailCallOpt)) { + LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false") + << " (guaranteed tail-call CC)\n"); return CalleeCC == CallerCC; + } // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet(); bool isCallerStructRet = MF.getFunction().hasStructRetAttr(); - if (isCalleeStructRet || isCallerStructRet) + if (isCalleeStructRet != isCallerStructRet) { + LLVM_DEBUG(dbgs() << "false (mismatched sret)\n"); return false; + } // Externally-defined functions with weak linkage should not be // tail-called on ARM when the OS does not support dynamic @@ -3080,8 +3046,11 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( const GlobalValue *GV = G->getGlobal(); const Triple &TT = getTargetMachine().getTargetTriple(); if (GV->hasExternalWeakLinkage() && - (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO())) + (!TT.isOSWindows() || TT.isOSBinFormatELF() || + TT.isOSBinFormatMachO())) { + LLVM_DEBUG(dbgs() << "false (external weak linkage)\n"); return false; + } } // Check that the call results are passed in the same way. @@ -3110,50 +3079,17 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization( // If the callee takes no arguments then go on to check the results of the // call. - if (!Outs.empty()) { - if (CCInfo.getStackSize()) { - // Check if the arguments are already laid out in the right way as - // the caller's fixed stack objects. - MachineFrameInfo &MFI = MF.getFrameInfo(); - const MachineRegisterInfo *MRI = &MF.getRegInfo(); - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); - i != e; - ++i, ++realArgIdx) { - CCValAssign &VA = ArgLocs[i]; - EVT RegVT = VA.getLocVT(); - SDValue Arg = OutVals[realArgIdx]; - ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; - if (VA.getLocInfo() == CCValAssign::Indirect) - return false; - if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) { - // f64 and vector types are split into multiple registers or - // register/stack-slot combinations. The types will not match - // the registers; give up on memory f64 refs until we figure - // out what to do about this. - if (!VA.isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - if (RegVT == MVT::v2f64) { - if (!ArgLocs[++i].isRegLoc()) - return false; - if (!ArgLocs[++i].isRegLoc()) - return false; - } - } else if (!VA.isRegLoc()) { - if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, - MFI, MRI, TII)) - return false; - } - } - } - - const MachineRegisterInfo &MRI = MF.getRegInfo(); - if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) - return false; + const MachineRegisterInfo &MRI = MF.getRegInfo(); + if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) { + LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n"); + return false; } + // If the stack arguments for this call do not fit into our own save area then + // the call cannot be made tail. + if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize()) + return false; + return true; } diff --git a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll index 4996cc8ecbf02..9e400364a6099 100644 --- a/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll +++ b/llvm/test/CodeGen/ARM/fp-arg-shuffle.ll @@ -1,9 +1,30 @@ ; RUN: llc -mtriple=arm-eabi -mattr=+neon -float-abi=soft %s -o - | FileCheck %s ; CHECK: function1 -; CHECK-NOT: vmov define double @function1(double %a, double %b, double %c, double %d, double %e, double %f) nounwind noinline ssp { entry: +; CHECK-LABEL: function1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r11, lr} +; CHECK-NEXT: push {r4, r5, r11, lr} +; CHECK-NEXT: vldr d16, [sp, #40] +; CHECK-NEXT: vldr d17, [sp, #32] +; CHECK-NEXT: vmov r12, lr, d16 +; CHECK-NEXT: vldr d16, [sp, #16] +; CHECK-NEXT: vmov r4, r5, d17 +; CHECK-NEXT: vldr d17, [sp, #24] +; CHECK-NEXT: str r3, [sp, #36] +; CHECK-NEXT: str r2, [sp, #32] +; CHECK-NEXT: str r1, [sp, #44] +; CHECK-NEXT: str r0, [sp, #40] +; CHECK-NEXT: vstr d17, [sp, #16] +; CHECK-NEXT: vstr d16, [sp, #24] +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: mov r2, r4 +; CHECK-NEXT: mov r3, r5 +; CHECK-NEXT: pop {r4, r5, r11, lr} +; CHECK-NEXT: b function2 %call = tail call double @function2(double %f, double %e, double %d, double %c, double %b, double %a) nounwind ret double %call } diff --git a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll index 6fc56967bc7aa..65aff46658fd1 100644 --- a/llvm/test/CodeGen/ARM/fp16-vector-argument.ll +++ b/llvm/test/CodeGen/ARM/fp16-vector-argument.ll @@ -145,26 +145,21 @@ entry: define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x half>, <8 x half>) { ; SOFT-LABEL: many_args_test: ; SOFT: @ %bb.0: @ %entry -; SOFT-NEXT: push {r11, lr} -; SOFT-NEXT: sub sp, sp, #32 -; SOFT-NEXT: add r12, sp, #80 +; SOFT-NEXT: add r12, sp, #40 ; SOFT-NEXT: vld1.64 {d16, d17}, [r12] -; SOFT-NEXT: add r12, sp, #48 +; SOFT-NEXT: add r12, sp, #8 ; SOFT-NEXT: vabs.f16 q8, q8 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] -; SOFT-NEXT: add r12, sp, #64 +; SOFT-NEXT: add r12, sp, #24 ; SOFT-NEXT: vadd.f16 q8, q8, q9 ; SOFT-NEXT: vld1.64 {d18, d19}, [r12] ; SOFT-NEXT: add r12, sp, #16 ; SOFT-NEXT: vmul.f16 q8, q9, q8 ; SOFT-NEXT: vst1.64 {d16, d17}, [r12] -; SOFT-NEXT: mov r12, sp -; SOFT-NEXT: vldr d16, [sp, #40] -; SOFT-NEXT: vst1.16 {d16}, [r12:64]! -; SOFT-NEXT: str r3, [r12] -; SOFT-NEXT: bl use -; SOFT-NEXT: add sp, sp, #32 -; SOFT-NEXT: pop {r11, pc} +; SOFT-NEXT: vldr d16, [sp] +; SOFT-NEXT: vstr d16, [sp] +; SOFT-NEXT: str r3, [sp, #8] +; SOFT-NEXT: b use ; ; HARD-LABEL: many_args_test: ; HARD: @ %bb.0: @ %entry @@ -177,33 +172,25 @@ define void @many_args_test(double, float, i16, <4 x half>, <8 x half>, <8 x hal ; ; SOFTEB-LABEL: many_args_test: ; SOFTEB: @ %bb.0: @ %entry -; SOFTEB-NEXT: .save {r11, lr} -; SOFTEB-NEXT: push {r11, lr} -; SOFTEB-NEXT: .pad #32 -; SOFTEB-NEXT: sub sp, sp, #32 -; SOFTEB-NEXT: add r12, sp, #80 -; SOFTEB-NEXT: mov lr, sp +; SOFTEB-NEXT: add r12, sp, #40 ; SOFTEB-NEXT: vld1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: add r12, sp, #48 +; SOFTEB-NEXT: add r12, sp, #8 ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vabs.f16 q8, q8 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] -; SOFTEB-NEXT: add r12, sp, #64 +; SOFTEB-NEXT: add r12, sp, #24 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vadd.f16 q8, q8, q9 ; SOFTEB-NEXT: vld1.64 {d18, d19}, [r12] ; SOFTEB-NEXT: add r12, sp, #16 ; SOFTEB-NEXT: vrev64.16 q9, q9 ; SOFTEB-NEXT: vmul.f16 q8, q9, q8 -; SOFTEB-NEXT: vldr d18, [sp, #40] -; SOFTEB-NEXT: vrev64.16 d18, d18 -; SOFTEB-NEXT: vst1.16 {d18}, [lr:64]! -; SOFTEB-NEXT: str r3, [lr] +; SOFTEB-NEXT: vldr d18, [sp] ; SOFTEB-NEXT: vrev64.16 q8, q8 ; SOFTEB-NEXT: vst1.64 {d16, d17}, [r12] -; SOFTEB-NEXT: bl use -; SOFTEB-NEXT: add sp, sp, #32 -; SOFTEB-NEXT: pop {r11, pc} +; SOFTEB-NEXT: vstr d18, [sp] +; SOFTEB-NEXT: str r3, [sp, #8] +; SOFTEB-NEXT: b use ; ; HARDEB-LABEL: many_args_test: ; HARDEB: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/ARM/musttail.ll b/llvm/test/CodeGen/ARM/musttail.ll new file mode 100644 index 0000000000000..145e95bdf47a7 --- /dev/null +++ b/llvm/test/CodeGen/ARM/musttail.ll @@ -0,0 +1,43 @@ +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s + +; The repro example from https://github.com/llvm/llvm-project/issues/57069#issuecomment-1212754850 +; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind sspstrong willreturn memory(none) +define hidden noundef i32 @many_args_callee(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #0 { + %7 = add nsw i32 %1, %0 + %8 = add nsw i32 %7, %2 + %9 = add nsw i32 %8, %3 + %10 = add nsw i32 %9, %4 + %11 = add nsw i32 %10, %5 + ret i32 %11 +} + +; Function Attrs: mustprogress nofree norecurse nosync nounwind sspstrong willreturn memory(none) +define hidden noundef i32 @many_args(i32 noundef %0, i32 noundef %1, i32 noundef %2, i32 noundef %3, i32 noundef %4, i32 noundef %5) local_unnamed_addr #1 { +; CHECK: b many_args_callee + %7 = musttail call noundef i32 @many_args_callee(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6) + ret i32 %7 +} + +; Test with sret +; Function Attrs: optsize +declare dso_local void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8, i16 noundef signext) local_unnamed_addr #1 + +; Function Attrs: mustprogress optsize +define dso_local void @sret_caller(ptr dead_on_unwind noalias writable sret({ double, double }) align 8 %agg.result, i16 noundef signext %P0) local_unnamed_addr #0 { +entry: +; CHECK: b sret_callee + musttail call void @sret_callee(ptr dead_on_unwind writable sret({ double, double }) align 8 %agg.result, i16 noundef signext 20391) #2 + ret void +} + +%struct.Large = type { [60 x i32] } + +; Function Attrs: mustprogress noinline optnone +define dso_local void @large_caller(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) #0 { +entry: +; CHECK: b large_callee + musttail call void @large_callee(i64 noundef %0, i64 noundef %1, %struct.Large* noundef byval(%struct.Large) align 4 %2, %struct.Large* noundef byval(%struct.Large) align 4 %3) + ret void +} + +declare dso_local void @large_callee(i64 noundef, i64 noundef, %struct.Large* noundef byval(%struct.Large) align 4, %struct.Large* noundef byval(%struct.Large) align 4) #1 \ No newline at end of file