@@ -890,6 +890,12 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
890890 LD->getExtensionType () != ISD::NON_EXTLOAD)
891891 return false ;
892892
893+ // If the load's outgoing chain has more than one use, we can't (currently)
894+ // move the load since we'd most likely create a loop. TODO: Maybe it could
895+ // work if moveBelowOrigChain() updated *all* the chain users.
896+ if (!Callee.getValue (1 ).hasOneUse ())
897+ return false ;
898+
893899 // Now let's find the callseq_start.
894900 while (HasCallSeq && Chain.getOpcode () != ISD::CALLSEQ_START) {
895901 if (!Chain.hasOneUse ())
@@ -913,11 +919,13 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
913919 Callee.getValue (1 ).hasOneUse ())
914920 return true ;
915921
916- // Look past CopyToReg's.
917- if (Chain.getOperand (0 ).getOpcode () == ISD::CopyToReg) {
922+ // Look past CopyToRegs. We only walk one path, so the chain mustn't branch.
923+ if (Chain.getOperand (0 ).getOpcode () == ISD::CopyToReg &&
924+ Chain.getOperand (0 ).getValue (0 ).hasOneUse ()) {
918925 Chain = Chain.getOperand (0 );
919926 continue ;
920927 }
928+
921929 return false ;
922930 }
923931}
@@ -1362,6 +1370,22 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
13621370 (N->getOpcode () == X86ISD::TC_RETURN &&
13631371 (Subtarget->is64Bit () ||
13641372 !getTargetMachine ().isPositionIndependent ())))) {
1373+
1374+ if (N->getOpcode () == X86ISD::TC_RETURN) {
1375+ // There needs to be enough non-callee-saved GPRs available to compute
1376+ // the load address if folded into the tailcall. See how the
1377+ // X86tcret_6regs and X86tcret_1reg classes are used and defined.
1378+ unsigned NumRegs = 0 ;
1379+ for (unsigned I = 3 , E = N->getNumOperands (); I != E; ++I) {
1380+ if (isa<RegisterSDNode>(N->getOperand (I)))
1381+ ++NumRegs;
1382+ }
1383+ if (!Subtarget->is64Bit () && NumRegs > 1 )
1384+ continue ;
1385+ if (NumRegs > 6 )
1386+ continue ;
1387+ }
1388+
13651389 // / Also try moving call address load from outside callseq_start to just
13661390 // / before the call to allow it to be folded.
13671391 // /
0 commit comments