Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/include/llvm/CodeGen/SelectionDAG.h
Original file line number Diff line number Diff line change
Expand Up @@ -1313,7 +1313,7 @@ class SelectionDAG {
/// ISD::CondCode instead of an SDValue.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS,
ISD::CondCode Cond, SDValue Chain = SDValue(),
bool IsSignaling = false) {
bool IsSignaling = false, SDNodeFlags Flags = {}) {
assert(LHS.getValueType().isVector() == RHS.getValueType().isVector() &&
"Vector/scalar operand type mismatch for setcc");
assert(LHS.getValueType().isVector() == VT.isVector() &&
Expand All @@ -1322,8 +1322,9 @@ class SelectionDAG {
"Cannot create a setCC of an invalid node.");
if (Chain)
return getNode(IsSignaling ? ISD::STRICT_FSETCCS : ISD::STRICT_FSETCC, DL,
{VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)});
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond));
{VT, MVT::Other}, {Chain, LHS, RHS, getCondCode(Cond)},
Flags);
return getNode(ISD::SETCC, DL, VT, LHS, RHS, getCondCode(Cond), Flags);
}

/// Helper function to make it easier to build VP_SETCCs if you just have an
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5356,7 +5356,8 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// comparison may check if the operand is NAN, INF, zero, normal, etc. The
/// result should be used as the condition operand for a select or branch.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
const DenormalMode &Mode) const;
const DenormalMode &Mode,
SDNodeFlags Flags = {}) const;

/// Return a target-dependent result if the input operand is not suitable for
/// use with a square root estimate calculation.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29832,7 +29832,8 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (!Reciprocal) {
SDLoc DL(Op);
// Try the target specific test first.
SDValue Test = TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT));
SDValue Test =
TLI.getSqrtInputTest(Op, DAG, DAG.getDenormalMode(VT), Flags);

// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 or value provided by
Expand Down
15 changes: 9 additions & 6 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7451,7 +7451,8 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
}

SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
const DenormalMode &Mode,
SDNodeFlags Flags) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
Expand All @@ -7461,18 +7462,20 @@ SDValue TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
// result.
if (Mode.Input == DenormalMode::PreserveSign ||
Mode.Input == DenormalMode::PositiveZero) {
// Test = X == 0.0
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
// Test = X == 0.0, with fast-math flags from fcmp.
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
/*IsSignaling=*/false, Flags);
}

// Testing it with denormal inputs to avoid wrong estimate.
//
// Test = fabs(X) < SmallestNormal
// Test = fabs(X) < SmallestNormal, with fast-math flags from fcmp.
const fltSemantics &FltSem = VT.getFltSemantics();
APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op, Flags);
return DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT, /*Chain=*/{},
/*IsSignaling=*/false, Flags);
}

SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
Expand Down
9 changes: 5 additions & 4 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12701,14 +12701,15 @@ static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
return SDValue();
}

SDValue
AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
SDValue AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode,
SDNodeFlags Flags) const {
SDLoc DL(Op);
EVT VT = Op.getValueType();
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ, /*Chain=*/{},
/*IsSignaling=*/false, Flags);
}

SDValue
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -799,7 +799,8 @@ class AArch64TargetLowering : public TargetLowering {
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &ExtraSteps) const override;
SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
const DenormalMode &Mode) const override;
const DenormalMode &Mode,
SDNodeFlags Flags) const override;
SDValue getSqrtResultForDenormInput(SDValue Operand,
SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
Expand Down
7 changes: 4 additions & 3 deletions llvm/lib/Target/PowerPC/PPCISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14650,17 +14650,18 @@ static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
}

SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
const DenormalMode &Mode) const {
const DenormalMode &Mode,
SDNodeFlags Flags) const {
// We only have VSX Vector Test for software Square Root.
EVT VT = Op.getValueType();
if (!isTypeLegal(MVT::i1) ||
(VT != MVT::f64 &&
((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
return TargetLowering::getSqrtInputTest(Op, DAG, Mode, Flags);

SDLoc DL(Op);
// The output register of FTSQRT is CR field.
SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op, Flags);
// ftsqrt BF,FRB
// Let e_b be the unbiased exponent of the double-precision
// floating-point operand in register FRB.
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/PowerPC/PPCISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -1463,7 +1463,8 @@ namespace llvm {
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
int &RefinementSteps) const override;
SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
const DenormalMode &Mode) const override;
const DenormalMode &Mode,
SDNodeFlags Flags) const override;
SDValue getSqrtResultForDenormInput(SDValue Operand,
SelectionDAG &DAG) const override;
unsigned combineRepeatedFPDivisors() const override;
Expand Down
90 changes: 40 additions & 50 deletions llvm/test/CodeGen/PowerPC/fmf-propagation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -325,24 +325,21 @@ define float @sqrt_afn_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_afn_ieee:
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
; GLOBAL-NEXT: xsabsdp 0, 1
; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
; GLOBAL-NEXT: fcmpu 0, 0, 2
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: blt 0, .LBB11_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
; GLOBAL-NEXT: xvcvsxwdp 2, 34
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xvcvsxwdp 3, 34
; GLOBAL-NEXT: xsmulsp 2, 1, 0
; GLOBAL-NEXT: xsabsdp 1, 1
; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI11_0@toc@l(3)
; GLOBAL-NEXT: xsmulsp 0, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB11_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
; GLOBAL-NEXT: xsmulsp 0, 2, 0
; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
; GLOBAL-NEXT: xssubsp 1, 1, 2
; GLOBAL-NEXT: xxlxor 2, 2, 2
; GLOBAL-NEXT: xsmulsp 0, 0, 3
; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
Expand Down Expand Up @@ -393,21 +390,19 @@ define float @sqrt_afn_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_afn_preserve_sign:
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB13_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
; GLOBAL-NEXT: xvcvsxwdp 2, 34
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xvcvsxwdp 3, 34
; GLOBAL-NEXT: xsmulsp 2, 1, 0
; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI13_0@toc@l(3)
; GLOBAL-NEXT: xsmulsp 0, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB13_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: xsmulsp 0, 2, 0
; GLOBAL-NEXT: xxlxor 2, 2, 2
; GLOBAL-NEXT: xsmulsp 0, 0, 3
; GLOBAL-NEXT: fsel 2, 1, 2, 0
; GLOBAL-NEXT: xsnegdp 1, 1
; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
Expand Down Expand Up @@ -462,24 +457,21 @@ define float @sqrt_fast_ieee(float %x) #0 {
;
; GLOBAL-LABEL: sqrt_fast_ieee:
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
; GLOBAL-NEXT: xsabsdp 0, 1
; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
; GLOBAL-NEXT: fcmpu 0, 0, 2
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: blt 0, .LBB15_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha
; GLOBAL-NEXT: xvcvsxwdp 2, 34
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xvcvsxwdp 3, 34
; GLOBAL-NEXT: xsmulsp 2, 1, 0
; GLOBAL-NEXT: xsabsdp 1, 1
; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI15_0@toc@l(3)
; GLOBAL-NEXT: xsmulsp 0, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB15_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
; GLOBAL-NEXT: xsmulsp 0, 2, 0
; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
; GLOBAL-NEXT: xssubsp 1, 1, 2
; GLOBAL-NEXT: xxlxor 2, 2, 2
; GLOBAL-NEXT: xsmulsp 0, 0, 3
; GLOBAL-NEXT: fsel 1, 1, 0, 2
; GLOBAL-NEXT: blr
%rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
ret float %rt
Expand Down Expand Up @@ -517,21 +509,19 @@ define float @sqrt_fast_preserve_sign(float %x) #1 {
;
; GLOBAL-LABEL: sqrt_fast_preserve_sign:
; GLOBAL: # %bb.0:
; GLOBAL-NEXT: xxlxor 0, 0, 0
; GLOBAL-NEXT: fcmpu 0, 1, 0
; GLOBAL-NEXT: beq 0, .LBB16_2
; GLOBAL-NEXT: # %bb.1:
; GLOBAL-NEXT: xsrsqrtesp 0, 1
; GLOBAL-NEXT: vspltisw 2, -3
; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha
; GLOBAL-NEXT: xvcvsxwdp 2, 34
; GLOBAL-NEXT: xsmulsp 1, 1, 0
; GLOBAL-NEXT: xsmaddasp 2, 1, 0
; GLOBAL-NEXT: xvcvsxwdp 3, 34
; GLOBAL-NEXT: xsmulsp 2, 1, 0
; GLOBAL-NEXT: xsmaddasp 3, 2, 0
; GLOBAL-NEXT: lfs 0, .LCPI16_0@toc@l(3)
; GLOBAL-NEXT: xsmulsp 0, 1, 0
; GLOBAL-NEXT: xsmulsp 0, 0, 2
; GLOBAL-NEXT: .LBB16_2:
; GLOBAL-NEXT: fmr 1, 0
; GLOBAL-NEXT: xsmulsp 0, 2, 0
; GLOBAL-NEXT: xxlxor 2, 2, 2
; GLOBAL-NEXT: xsmulsp 0, 0, 3
; GLOBAL-NEXT: fsel 2, 1, 2, 0
; GLOBAL-NEXT: xsnegdp 1, 1
; GLOBAL-NEXT: fsel 1, 1, 2, 0
; GLOBAL-NEXT: blr
%rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
ret float %rt
Expand Down
Loading