@@ -2407,8 +2407,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
24072407 isTailCall = false;
24082408
24092409 // For both the non-secure calls and the returns from a CMSE entry function,
2410- // the function needs to do some extra work after the call, or before the
2411- // return, respectively, thus it cannot end with a tail call
2410+ // the function needs to do some extra work afte r the call, or before the
2411+ // return, respectively, thus it cannot end with atail call
24122412 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
24132413 isTailCall = false;
24142414
@@ -2960,6 +2960,50 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
29602960 Size = std::max<int>(Size - Excess, 0);
29612961}
29622962
2963+ /// MatchingStackOffset - Return true if the given stack call argument is
2964+ /// already available in the same position (relatively) of the caller's
2965+ /// incoming argument stack.
2966+ static
2967+ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2968+ MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2969+ const TargetInstrInfo *TII) {
2970+ unsigned Bytes = Arg.getValueSizeInBits() / 8;
2971+ int FI = std::numeric_limits<int>::max();
2972+ if (Arg.getOpcode() == ISD::CopyFromReg) {
2973+ Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2974+ if (!VR.isVirtual())
2975+ return false;
2976+ MachineInstr *Def = MRI->getVRegDef(VR);
2977+ if (!Def)
2978+ return false;
2979+ if (!Flags.isByVal()) {
2980+ if (!TII->isLoadFromStackSlot(*Def, FI))
2981+ return false;
2982+ } else {
2983+ return false;
2984+ }
2985+ } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2986+ if (Flags.isByVal())
2987+ // ByVal argument is passed in as a pointer but it's now being
2988+ // dereferenced. e.g.
2989+ // define @foo(%struct.X* %A) {
2990+ // tail call @bar(%struct.X* byval %A)
2991+ // }
2992+ return false;
2993+ SDValue Ptr = Ld->getBasePtr();
2994+ FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2995+ if (!FINode)
2996+ return false;
2997+ FI = FINode->getIndex();
2998+ } else
2999+ return false;
3000+
3001+ assert(FI != std::numeric_limits<int>::max());
3002+ if (!MFI.isFixedObjectIndex(FI))
3003+ return false;
3004+ return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
3005+ }
3006+
29633007/// IsEligibleForTailCallOptimization - Check whether the call is eligible
29643008/// for tail call optimization. Targets which want to do tail call
29653009/// optimization should implement this function. Note that this function also
@@ -3001,10 +3045,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30013045 for (const CCValAssign &AL : ArgLocs)
30023046 if (AL.isRegLoc())
30033047 AddressRegisters.erase(AL.getLocReg());
3004- if (AddressRegisters.empty()) {
3005- LLVM_DEBUG(dbgs() << "false (no space for target address)\n");
3048+ if (AddressRegisters.empty())
30063049 return false;
3007- }
30083050 }
30093051
30103052 // Look for obvious safe cases to perform tail call optimization that do not
@@ -3013,26 +3055,18 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30133055 // Exception-handling functions need a special set of instructions to indicate
30143056 // a return to the hardware. Tail-calling another function would probably
30153057 // break this.
3016- if (CallerF.hasFnAttribute("interrupt")) {
3017- LLVM_DEBUG(dbgs() << "false (interrupt attribute)\n");
3058+ if (CallerF.hasFnAttribute("interrupt"))
30183059 return false;
3019- }
30203060
3021- if (canGuaranteeTCO(CalleeCC,
3022- getTargetMachine().Options.GuaranteedTailCallOpt)) {
3023- LLVM_DEBUG(dbgs() << (CalleeCC == CallerCC ? "true" : "false")
3024- << " (guaranteed tail-call CC)\n");
3061+ if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
30253062 return CalleeCC == CallerCC;
3026- }
30273063
3028- // Also avoid sibcall optimization if only one of caller or callee uses
3029- // struct return semantics.
3064+ // Also avoid sibcall optimization if either caller or callee uses struct
3065+ // return semantics.
30303066 bool isCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
30313067 bool isCallerStructRet = MF.getFunction().hasStructRetAttr();
3032- if (isCalleeStructRet != isCallerStructRet) {
3033- LLVM_DEBUG(dbgs() << "false (struct-ret)\n");
3068+ if (isCalleeStructRet || isCallerStructRet)
30343069 return false;
3035- }
30363070
30373071 // Externally-defined functions with weak linkage should not be
30383072 // tail-called on ARM when the OS does not support dynamic
@@ -3045,11 +3079,8 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30453079 const GlobalValue *GV = G->getGlobal();
30463080 const Triple &TT = getTargetMachine().getTargetTriple();
30473081 if (GV->hasExternalWeakLinkage() &&
3048- (!TT.isOSWindows() || TT.isOSBinFormatELF() ||
3049- TT.isOSBinFormatMachO())) {
3050- LLVM_DEBUG(dbgs() << "false (external weak linkage)\n");
3082+ (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
30513083 return false;
3052- }
30533084 }
30543085
30553086 // Check that the call results are passed in the same way.
@@ -3058,44 +3089,70 @@ bool ARMTargetLowering::IsEligibleForTailCallOptimization(
30583089 getEffectiveCallingConv(CalleeCC, isVarArg),
30593090 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
30603091 CCAssignFnForReturn(CalleeCC, isVarArg),
3061- CCAssignFnForReturn(CallerCC, CallerF.isVarArg()))) {
3062- LLVM_DEBUG(dbgs() << "false (incompatible results)\n");
3092+ CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
30633093 return false;
3064- }
30653094 // The callee has to preserve all registers the caller needs to preserve.
30663095 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
30673096 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
30683097 if (CalleeCC != CallerCC) {
30693098 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3070- if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) {
3071- LLVM_DEBUG(dbgs() << "false (not all registers preserved)\n");
3099+ if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
30723100 return false;
3073- }
30743101 }
30753102
3076- // If Caller's vararg argument has been split between registers and
3103+ // If Caller's vararg or byval argument has been split between registers and
30773104 // stack, do not perform tail call, since part of the argument is in caller's
30783105 // local frame.
30793106 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3080- if (CLI.IsVarArg && AFI_Caller->getArgRegsSaveSize()) {
3081- LLVM_DEBUG(dbgs() << "false (vararg arg reg save area)\n");
3107+ if (AFI_Caller->getArgRegsSaveSize())
30823108 return false;
3083- }
30843109
30853110 // If the callee takes no arguments then go on to check the results of the
30863111 // call.
3087- const MachineRegisterInfo &MRI = MF.getRegInfo();
3088- if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) {
3089- LLVM_DEBUG(dbgs() << "false (parameters in CSRs do not match)\n");
3090- return false;
3091- }
3112+ if (!Outs.empty()) {
3113+ if (CCInfo.getStackSize()) {
3114+ // Check if the arguments are already laid out in the right way as
3115+ // the caller's fixed stack objects.
3116+ MachineFrameInfo &MFI = MF.getFrameInfo();
3117+ const MachineRegisterInfo *MRI = &MF.getRegInfo();
3118+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3119+ for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3120+ i != e;
3121+ ++i, ++realArgIdx) {
3122+ CCValAssign &VA = ArgLocs[i];
3123+ EVT RegVT = VA.getLocVT();
3124+ SDValue Arg = OutVals[realArgIdx];
3125+ ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3126+ if (VA.getLocInfo() == CCValAssign::Indirect)
3127+ return false;
3128+ if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3129+ // f64 and vector types are split into multiple registers or
3130+ // register/stack-slot combinations. The types will not match
3131+ // the registers; give up on memory f64 refs until we figure
3132+ // out what to do about this.
3133+ if (!VA.isRegLoc())
3134+ return false;
3135+ if (!ArgLocs[++i].isRegLoc())
3136+ return false;
3137+ if (RegVT == MVT::v2f64) {
3138+ if (!ArgLocs[++i].isRegLoc())
3139+ return false;
3140+ if (!ArgLocs[++i].isRegLoc())
3141+ return false;
3142+ }
3143+ } else if (!VA.isRegLoc()) {
3144+ if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3145+ MFI, MRI, TII))
3146+ return false;
3147+ }
3148+ }
3149+ }
30923150
3093- // If the stack arguments for this call do not fit into our own save area then
3094- // the call cannot be made tail.
3095- if (CCInfo.getStackSize() > AFI_Caller->getArgumentStackSize())
3096- return false;
3151+ const MachineRegisterInfo &MRI = MF.getRegInfo();
3152+ if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3153+ return false;
3154+ }
30973155
3098- LLVM_DEBUG(dbgs() << "true\n");
30993156 return true;
31003157}
31013158
0 commit comments