diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d38a7c54c8cea..df30148b78b65 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2635,22 +2635,6 @@ bool SelectionDAG::expandMultipleResultFPLibCall( Results.push_back(LoadResult); } - if (CallRetResNo && !Node->hasAnyUseOfValue(*CallRetResNo)) { - // FIXME: Find a way to avoid updating the root. This is needed for x86, - // which uses a floating-point stack. If (for example) the node to be - // expanded has two results one floating-point which is returned by the - // call, and one integer result, returned via an output pointer. If only the - // integer result is used then the `CopyFromReg` for the FP result may be - // optimized out. This prevents an FP stack pop from being emitted for it. - // Setting the root like this ensures there will be a use of the - // `CopyFromReg` chain, and ensures the FP pop will be emitted. - SDValue NewRoot = - getNode(ISD::TokenFactor, DL, MVT::Other, getRoot(), CallChain); - setRoot(NewRoot); - // Ensure the new root is reachable from the results. - Results[0] = getMergeValues({Results[0], NewRoot}, DL); - } - return true; } diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 84bcdae520885..a7a0e84ba2b60 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -6717,6 +6717,17 @@ void X86DAGToDAGISel::Select(SDNode *Node) { ReplaceNode(Node, Res); return; } + case X86ISD::POP_FROM_X87_REG: { + SDValue Chain = Node->getOperand(0); + Register Reg = cast(Node->getOperand(1))->getReg(); + SDValue Glue; + if (Node->getNumValues() == 3) + Glue = Node->getOperand(2); + SDValue Copy = + CurDAG->getCopyFromReg(Chain, dl, Reg, Node->getValueType(0), Glue); + ReplaceNode(Node, Copy.getNode()); + return; + } } SelectCode(Node); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7fb5157c00d84..d62228c413196 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -35114,6 +35114,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(CVTTP2SIS_SAE) NODE_NAME_CASE(CVTTP2UIS) NODE_NAME_CASE(MCVTTP2UIS) + NODE_NAME_CASE(POP_FROM_X87_REG) } return nullptr; #undef NODE_NAME_CASE diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index fe79fefeed631..4a2b35e9efe7c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -81,6 +81,15 @@ namespace llvm { // marker instruction. CALL_RVMARKER, + /// The same as ISD::CopyFromReg except that this node makes it explicit + /// that it may lower to an x87 FPU stack pop. Optimizations should be more + /// cautious when handling this node than a normal CopyFromReg to avoid + /// removing a required FPU stack pop. A key requirement is optimizations + /// should not optimize any users of a chain that contains a + /// POP_FROM_X87_REG to use a chain from a point earlier than the + /// POP_FROM_X87_REG (which may remove a required FPU stack pop). + POP_FROM_X87_REG, + /// X86 compare and logical compare instructions. CMP, FCMP, diff --git a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp index ee4bb758102f4..80b4aeeda1e00 100644 --- a/llvm/lib/Target/X86/X86ISelLoweringCall.cpp +++ b/llvm/lib/Target/X86/X86ISelLoweringCall.cpp @@ -1095,6 +1095,15 @@ static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, return DAG.getBitcast(ValVT, ValReturned); } +static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, + const SDLoc &dl, Register Reg, EVT VT, + SDValue Glue) { + SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue); + SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue}; + return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs, + ArrayRef(Ops, Glue.getNode() ? 3 : 2)); +} + /// Lower the result values of a call into the /// appropriate copies out of appropriate physical registers. /// @@ -1145,8 +1154,8 @@ SDValue X86TargetLowering::LowerCallResult( // If we prefer to use the value in xmm registers, copy it out as f80 and // use a truncate to move it from fp stack reg to xmm reg. bool RoundAfterCopy = false; - if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && - isScalarFPTypeInSSEReg(VA.getValVT())) { + bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1; + if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) { if (!Subtarget.hasX87()) report_fatal_error("X87 register return with X87 disabled"); CopyVT = MVT::f80; @@ -1160,8 +1169,12 @@ SDValue X86TargetLowering::LowerCallResult( Val = getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue); } else { - Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue) - .getValue(1); + Chain = + X87Result + ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue) + .getValue(1) + : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue) + .getValue(1); Val = Chain.getValue(0); InGlue = Chain.getValue(2); } diff --git a/llvm/test/CodeGen/PowerPC/llvm.modf.ll b/llvm/test/CodeGen/PowerPC/llvm.modf.ll index 69e3b22c7352c..1b137c786cc91 100644 --- a/llvm/test/CodeGen/PowerPC/llvm.modf.ll +++ b/llvm/test/CodeGen/PowerPC/llvm.modf.ll @@ -328,3 +328,108 @@ define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128(ppc_fp128 %a) { %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) ret { ppc_fp128, ppc_fp128 } %result } + +define ppc_fp128 @test_modf_ppcf128_only_use_intergral(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_intergral: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f1, 32(r1) +; CHECK-NEXT: lfd f2, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 + ret ppc_fp128 %result.1 +} + +define ppc_fp128 @test_modf_ppcf128_only_use_fractional(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_fractional: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 + ret ppc_fp128 %result.1 +} + +define { ppc_fp128, ppc_fp128 } @test_modf_ppcf128_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f3, 32(r1) +; CHECK-NEXT: lfd f4, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + ret { ppc_fp128, ppc_fp128 } %result +} + +define ppc_fp128 @test_modf_ppcf128_only_use_intergral_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_intergral_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: lfd f1, 32(r1) +; CHECK-NEXT: lfd f2, 40(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 1 + ret ppc_fp128 %result.1 +} + +define ppc_fp128 @test_modf_ppcf128_only_use_fractional_tail_call(ppc_fp128 %a) { +; CHECK-LABEL: test_modf_ppcf128_only_use_fractional_tail_call: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r5, r1, 32 +; CHECK-NEXT: bl modfl +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = tail call { ppc_fp128, ppc_fp128 } @llvm.modf.ppcf128(ppc_fp128 %a) + %result.1 = extractvalue { ppc_fp128, ppc_fp128 } %result, 0 + ret ppc_fp128 %result.1 +}