Skip to content

Commit 4566628

Browse files
committed
SelectionDAG: Support nofpclass
Currently SelectionDAG ignroes the nofpclass information from arguments. Such as define dso_local float @f(float noundef nofpclass(nan zero) %a, float noundef nofpclass(nan zero) %b) #0 { entry: %cond = tail call float @llvm.maximumnum.f32(float %a, float %b) ret float %cond } In SelectionDAG::isKnownNeverNaN, a false is returned. TODO: 1) bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) needs to process hasNoSNaN; 2) bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) needs to process Zero and SignedZero. These 2 problems will be fixed with other PRs.
1 parent e8a7390 commit 4566628

File tree

8 files changed

+142
-59
lines changed

8 files changed

+142
-59
lines changed

llvm/include/llvm/CodeGen/SelectionDAG.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2136,6 +2136,14 @@ class SelectionDAG {
21362136
/// positive or negative zero.
21372137
bool isKnownNeverZeroFloat(SDValue Op) const;
21382138

2139+
/// Test whether the given floating point SDValue is known to never be
2140+
/// positive zero.
2141+
bool isKnownNeverPosZeroFloat(SDValue Op) const;
2142+
2143+
/// Test whether the given floating point SDValue is known to never be
2144+
/// negative zero.
2145+
bool isKnownNeverNegZeroFloat(SDValue Op) const;
2146+
21392147
/// Test whether the given SDValue is known to contain non-zero value(s).
21402148
bool isKnownNeverZero(SDValue Op, unsigned Depth = 0) const;
21412149

llvm/include/llvm/CodeGen/SelectionDAGNodes.h

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -383,6 +383,7 @@ struct SDNodeFlags {
383383
bool Exact : 1;
384384
bool Disjoint : 1;
385385
bool NonNeg : 1;
386+
// deprecated: Use NoQNanS && NoSNaNs
386387
bool NoNaNs : 1;
387388
bool NoInfs : 1;
388389
bool NoSignedZeros : 1;
@@ -400,19 +401,27 @@ struct SDNodeFlags {
400401
// Instructions with attached 'unpredictable' metadata on IR level.
401402
bool Unpredictable : 1;
402403

404+
bool NoQNaNs : 1;
405+
bool NoSNaNs : 1;
406+
bool NoPosZeros : 1;
407+
bool NoNegZeros : 1;
408+
403409
public:
404410
/// Default constructor turns off all optimization flags.
405411
SDNodeFlags()
406412
: NoUnsignedWrap(false), NoSignedWrap(false), Exact(false),
407413
Disjoint(false), NonNeg(false), NoNaNs(false), NoInfs(false),
408414
NoSignedZeros(false), AllowReciprocal(false), AllowContract(false),
409415
ApproximateFuncs(false), AllowReassociation(false), NoFPExcept(false),
410-
Unpredictable(false) {}
416+
Unpredictable(false), NoQNaNs(false), NoSNaNs(false), NoPosZeros(false),
417+
NoNegZeros(false) {}
411418

412419
/// Propagate the fast-math-flags from an IR FPMathOperator.
413420
void copyFMF(const FPMathOperator &FPMO) {
414-
setNoNaNs(FPMO.hasNoNaNs());
421+
setNoSNaNs(FPMO.hasNoNaNs());
422+
setNoQNaNs(FPMO.hasNoNaNs());
415423
setNoInfs(FPMO.hasNoInfs());
424+
setNoNegZeros(FPMO.hasNoSignedZeros());
416425
setNoSignedZeros(FPMO.hasNoSignedZeros());
417426
setAllowReciprocal(FPMO.hasAllowReciprocal());
418427
setAllowContract(FPMO.hasAllowContract());
@@ -426,8 +435,20 @@ struct SDNodeFlags {
426435
void setExact(bool b) { Exact = b; }
427436
void setDisjoint(bool b) { Disjoint = b; }
428437
void setNonNeg(bool b) { NonNeg = b; }
429-
void setNoNaNs(bool b) { NoNaNs = b; }
438+
[[deprecated("Use SetSNaNs() and SetQNaNs()")]] void setNoNaNs(bool b) {
439+
NoNaNs = NoQNaNs = NoSNaNs = b;
440+
}
441+
void setNoQNaNs(bool b) {
442+
NoQNaNs = b;
443+
NoNaNs = (NoQNaNs && NoSNaNs);
444+
}
445+
void setNoSNaNs(bool b) {
446+
NoSNaNs = b;
447+
NoNaNs = (NoQNaNs && NoSNaNs);
448+
}
430449
void setNoInfs(bool b) { NoInfs = b; }
450+
void setNoPosZeros(bool b) { NoPosZeros = b; }
451+
void setNoNegZeros(bool b) { NoNegZeros = b; }
431452
void setNoSignedZeros(bool b) { NoSignedZeros = b; }
432453
void setAllowReciprocal(bool b) { AllowReciprocal = b; }
433454
void setAllowContract(bool b) { AllowContract = b; }
@@ -442,8 +463,12 @@ struct SDNodeFlags {
442463
bool hasExact() const { return Exact; }
443464
bool hasDisjoint() const { return Disjoint; }
444465
bool hasNonNeg() const { return NonNeg; }
445-
bool hasNoNaNs() const { return NoNaNs; }
466+
bool hasNoNaNs() const { return (NoSNaNs && NoQNaNs); }
467+
bool hasNoSNaNs() const { return NoSNaNs; }
468+
bool hasNoQNaNs() const { return NoQNaNs; }
446469
bool hasNoInfs() const { return NoInfs; }
470+
bool hasNoPosZeros() const { return NoPosZeros; }
471+
bool hasNoNegZeros() const { return NoNegZeros; }
447472
bool hasNoSignedZeros() const { return NoSignedZeros; }
448473
bool hasAllowReciprocal() const { return AllowReciprocal; }
449474
bool hasAllowContract() const { return AllowContract; }

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5435,7 +5435,12 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
54355435

54365436
bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const {
54375437
// If we're told that NaNs won't happen, assume they won't.
5438-
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
5438+
if (getTarget().Options.NoNaNsFPMath)
5439+
return true;
5440+
SDNodeFlags OpFlags = Op->getFlags();
5441+
if (SNaN && OpFlags.hasNoSNaNs())
5442+
return true;
5443+
if (OpFlags.hasNoSNaNs() && OpFlags.hasNoQNaNs())
54395444
return true;
54405445

54415446
if (Depth >= MaxRecursionDepth)
@@ -5569,11 +5574,39 @@ bool SelectionDAG::isKnownNeverZeroFloat(SDValue Op) const {
55695574
assert(Op.getValueType().isFloatingPoint() &&
55705575
"Floating point type expected");
55715576

5577+
SDNodeFlags OpFlags = Op->getFlags();
5578+
if (OpFlags.hasNoPosZeros() && OpFlags.hasNoNegZeros())
5579+
return true;
5580+
55725581
// If the value is a constant, we can obviously see if it is a zero or not.
55735582
return ISD::matchUnaryFpPredicate(
55745583
Op, [](ConstantFPSDNode *C) { return !C->isZero(); });
55755584
}
55765585

5586+
bool SelectionDAG::isKnownNeverPosZeroFloat(SDValue Op) const {
5587+
assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");
5588+
5589+
SDNodeFlags OpFlags = Op->getFlags();
5590+
if (OpFlags.hasNoPosZeros())
5591+
return true;
5592+
5593+
// If the value is a constant, we can obviously see if it is a zero or not.
5594+
return ISD::matchUnaryFpPredicate(
5595+
Op, [](ConstantFPSDNode *C) { return !C->isZero() || C->isNegative(); });
5596+
}
5597+
5598+
bool SelectionDAG::isKnownNeverNegZeroFloat(SDValue Op) const {
5599+
assert(Op.getValueType().isFloatingPoint() && "Floating point type expected");
5600+
5601+
SDNodeFlags OpFlags = Op->getFlags();
5602+
if (OpFlags.hasNoNegZeros())
5603+
return true;
5604+
5605+
// If the value is a constant, we can obviously see if it is a zero or not.
5606+
return ISD::matchUnaryFpPredicate(
5607+
Op, [](ConstantFPSDNode *C) { return !C->isZero() || !C->isNegative(); });
5608+
}
5609+
55775610
bool SelectionDAG::isKnownNeverZero(SDValue Op, unsigned Depth) const {
55785611
if (Depth >= MaxRecursionDepth)
55795612
return false; // Limit search depth.
@@ -7490,6 +7523,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
74907523
N2.getOpcode() != ISD::DELETED_NODE &&
74917524
N3.getOpcode() != ISD::DELETED_NODE &&
74927525
"Operand is DELETED_NODE!");
7526+
SDNodeFlags NewFlags = Flags;
74937527
// Perform various simplifications.
74947528
switch (Opcode) {
74957529
case ISD::FMA:
@@ -7535,6 +7569,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
75357569
assert((!VT.isVector() || VT.getVectorElementCount() ==
75367570
N1.getValueType().getVectorElementCount()) &&
75377571
"SETCC vector element counts must match!");
7572+
if (N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) {
7573+
NewFlags.setNoQNaNs(true);
7574+
NewFlags.setNoSNaNs(true);
7575+
}
75387576
// Use FoldSetCC to simplify SETCC's.
75397577
if (SDValue V = FoldSetCC(VT, N1, N2, cast<CondCodeSDNode>(N3)->get(), DL))
75407578
return V;
@@ -7548,6 +7586,11 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
75487586
}
75497587
case ISD::SELECT:
75507588
case ISD::VSELECT:
7589+
if ((N1->getFlags().hasNoNaNs() && N2->getFlags().hasNoNaNs()) ||
7590+
N3->getFlags().hasNoNaNs()) {
7591+
NewFlags.setNoQNaNs(true);
7592+
NewFlags.setNoSNaNs(true);
7593+
}
75517594
if (SDValue V = simplifySelect(N1, N2, N3))
75527595
return V;
75537596
break;
@@ -7654,12 +7697,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
76547697
AddNodeIDNode(ID, Opcode, VTs, Ops);
76557698
void *IP = nullptr;
76567699
if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
7657-
E->intersectFlagsWith(Flags);
7700+
E->intersectFlagsWith(NewFlags);
76587701
return SDValue(E, 0);
76597702
}
76607703

76617704
N = newSDNode<SDNode>(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs);
7662-
N->setFlags(Flags);
7705+
N->setFlags(NewFlags);
76637706
createOperands(N, Ops);
76647707
CSEMap.InsertNode(N, IP);
76657708
} else {

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3708,8 +3708,24 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
37083708
bool Negate = false;
37093709

37103710
SDNodeFlags Flags;
3711-
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
3711+
SelectInst *NewI = dyn_cast<SelectInst>(cast<SelectInst>(I).clone());
3712+
if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
37123713
Flags.copyFMF(*FPOp);
3714+
if (Cond->getFlags().hasNoNaNs() ||
3715+
(LHSVal->getFlags().hasNoNaNs() && RHSVal->getFlags().hasNoNaNs())) {
3716+
FastMathFlags FMF = FPOp->getFastMathFlags();
3717+
FMF.setNoNaNs(true);
3718+
NewI->setFastMathFlags(FMF);
3719+
CmpInst *CmpCond = dyn_cast<CmpInst>(NewI->getCondition());
3720+
if (isa<FPMathOperator>(CmpCond)) {
3721+
FastMathFlags CondFMF = CmpCond->getFastMathFlags();
3722+
CondFMF.setNoNaNs(true);
3723+
CmpCond->setFastMathFlags(CondFMF);
3724+
}
3725+
Flags.setNoQNaNs(true);
3726+
Flags.setNoSNaNs(true);
3727+
}
3728+
}
37133729

37143730
Flags.setUnpredictable(
37153731
cast<SelectInst>(I).getMetadata(LLVMContext::MD_unpredictable));
@@ -3735,7 +3751,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
37353751
// so we can't lower to FMINIMUM/FMAXIMUM because those nodes specify that
37363752
// -0.0 is less than +0.0.
37373753
const Value *LHS, *RHS;
3738-
auto SPR = matchSelectPattern(&I, LHS, RHS);
3754+
auto SPR = matchSelectPattern(NewI, LHS, RHS);
37393755
ISD::NodeType Opc = ISD::DELETED_NODE;
37403756
switch (SPR.Flavor) {
37413757
case SPF_UMAX: Opc = ISD::UMAX; break;
@@ -3798,6 +3814,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
37983814
BaseOps.clear();
37993815
}
38003816
}
3817+
NewI->deleteValue();
38013818

38023819
if (IsUnaryAbs) {
38033820
for (unsigned i = 0; i != NumValues; ++i) {
@@ -11775,6 +11792,22 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
1177511792
AssertOp = ISD::AssertSext;
1177611793
else if (Arg.hasAttribute(Attribute::ZExt))
1177711794
AssertOp = ISD::AssertZext;
11795+
if (Arg.hasAttribute(Attribute::NoFPClass)) {
11796+
SDNodeFlags InValFlags = InVals[i]->getFlags();
11797+
bool NoSNaN = ((Arg.getNoFPClass() & llvm::fcSNan) == llvm::fcSNan);
11798+
bool NoQNaN = ((Arg.getNoFPClass() & llvm::fcQNan) == llvm::fcQNan);
11799+
InValFlags.setNoSNaNs(NoSNaN);
11800+
InValFlags.setNoQNaNs(NoQNaN);
11801+
bool NoPosZeros =
11802+
((Arg.getNoFPClass() & llvm::fcPosZero) == llvm::fcPosZero);
11803+
bool NoNegZeros =
11804+
((Arg.getNoFPClass() & llvm::fcNegZero) == llvm::fcNegZero);
11805+
InValFlags.setNoPosZeros(NoPosZeros);
11806+
InValFlags.setNoNegZeros(NoNegZeros);
11807+
InValFlags.setNoInfs((Arg.getNoFPClass() & llvm::fcInf) ==
11808+
llvm::fcInf);
11809+
InVals[i]->setFlags(InValFlags);
11810+
}
1177811811

1177911812
ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], NumParts,
1178011813
PartVT, VT, nullptr, NewRoot,

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8422,6 +8422,10 @@ TargetLowering::createSelectForFMINNUM_FMAXNUM(SDNode *Node,
84228422
Opcode == ISD::STRICT_FMINNUM || Opcode == ISD::STRICT_FMAXNUM) &&
84238423
"Wrong opcode");
84248424

8425+
EVT VT = Node->getValueType(0);
8426+
if (VT.isVector() && isOperationLegal(Opcode, VT.getScalarType()))
8427+
return SDValue();
8428+
84258429
if (Node->getFlags().hasNoNaNs()) {
84268430
ISD::CondCode Pred = Opcode == ISD::FMINNUM ? ISD::SETLT : ISD::SETGT;
84278431
SDValue Op1 = Node->getOperand(0);

llvm/test/CodeGen/AMDGPU/known-never-snan.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,6 @@ define float @v_select_possible_nan_lhs_input_fmed3_r_i_i_f32(float %a, float %b
248248
; GCN-NEXT: v_add_f32_e32 v1, 1.0, v1
249249
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v2
250250
; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc
251-
; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0
252251
; GCN-NEXT: v_med3_f32 v0, v0, 2.0, 4.0
253252
; GCN-NEXT: s_setpc_b64 s[30:31]
254253
%b.nnan.add = fadd nnan float %b, 1.0

llvm/test/CodeGen/AMDGPU/reduction.ll

Lines changed: 7 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -498,18 +498,11 @@ entry:
498498
; XVI-NEXT: s_setpc_b64
499499

500500
; GFX9: s_waitcnt
501-
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
502-
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
503-
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
501+
; GFX9-NEXT: v_pk_max_f16 [[MAX:v[0-9]+]], v0, v1{{$}}
504502
; GFX9-NEXT: v_max_f16_sdwa v{{[0-9]+}}, [[MAX]], [[MAX]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
505503

506-
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
507-
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
508-
; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
509-
; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
510-
511-
; VI-DAG: v_max_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
512-
; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
504+
; VI-DAG: v_max_f16_sdwa [[MAX0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
505+
; VI-DAG: v_max_f16_e32 [[MAX1:v[0-9]+]], v0, v1
513506
; VI: v_max_f16_e32 v0, [[MAX1]], [[MAX0]]
514507
define half @reduction_fast_max_pattern_v4f16(<4 x half> %vec4) {
515508
entry:
@@ -537,19 +530,12 @@ entry:
537530
; XVI-NEXT: s_setpc_b64
538531

539532
; GFX9: s_waitcnt
540-
; GFX9-NEXT: v_pk_max_f16 [[CANON1:v[0-9]+]], v1, v1
541-
; GFX9-NEXT: v_pk_max_f16 [[CANON0:v[0-9]+]], v0, v0
542-
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], [[CANON0]], [[CANON1]]{{$}}
533+
; GFX9-NEXT: v_pk_min_f16 [[MIN:v[0-9]+]], v0, v1{{$}}
543534
; GFX9-NEXT: v_min_f16_sdwa v{{[0-9]+}}, [[MIN]], [[MIN]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
544535

545-
; VI-DAG: v_max_f16_sdwa [[CANON1:v[0-9]+]], v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
546-
; VI-DAG: v_max_f16_sdwa [[CANON3:v[0-9]+]], v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
547-
; VI-DAG: v_max_f16_e32 [[CANON0:v[0-9]+]], v0, v0
548-
; VI-DAG: v_max_f16_e32 [[CANON2:v[0-9]+]], v1, v1
549-
550-
; VI-DAG: v_min_f16_e32 [[MAX0:v[0-9]+]], [[CANON1]], [[CANON3]]
551-
; VI-DAG: v_min_f16_e32 [[MAX1:v[0-9]+]], [[CANON0]], [[CANON2]]
552-
; VI: v_min_f16_e32 v0, [[MAX1]], [[MAX0]]
536+
; VI-DAG: v_min_f16_sdwa [[MIN0:v[0-9]+]], v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
537+
; VI-DAG: v_min_f16_e32 [[MIN1:v[0-9]+]], v0, v1
538+
; VI: v_min_f16_e32 v0, [[MIN1]], [[MIN0]]
553539
define half @reduction_fast_min_pattern_v4f16(<4 x half> %vec4) {
554540
entry:
555541
%rdx.shuf = shufflevector <4 x half> %vec4, <4 x half> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>

llvm/test/CodeGen/X86/fminimum-fmaximum.ll

Lines changed: 13 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -411,23 +411,16 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
411411
; SSE2-NEXT: divss %xmm0, %xmm1
412412
; SSE2-NEXT: movd %xmm0, %eax
413413
; SSE2-NEXT: testl %eax, %eax
414-
; SSE2-NEXT: movaps %xmm0, %xmm3
415-
; SSE2-NEXT: js .LBB9_2
416-
; SSE2-NEXT: # %bb.1:
417-
; SSE2-NEXT: movaps %xmm1, %xmm3
418-
; SSE2-NEXT: .LBB9_2:
419-
; SSE2-NEXT: movaps %xmm3, %xmm2
420-
; SSE2-NEXT: cmpunordss %xmm3, %xmm2
421-
; SSE2-NEXT: movaps %xmm2, %xmm4
422-
; SSE2-NEXT: andps %xmm3, %xmm4
423-
; SSE2-NEXT: js .LBB9_4
424-
; SSE2-NEXT: # %bb.3:
414+
; SSE2-NEXT: js .LBB9_1
415+
; SSE2-NEXT: # %bb.2:
416+
; SSE2-NEXT: movaps %xmm0, %xmm2
417+
; SSE2-NEXT: jmp .LBB9_3
418+
; SSE2-NEXT: .LBB9_1:
419+
; SSE2-NEXT: movaps %xmm1, %xmm2
425420
; SSE2-NEXT: movaps %xmm0, %xmm1
426-
; SSE2-NEXT: .LBB9_4:
427-
; SSE2-NEXT: maxss %xmm1, %xmm3
428-
; SSE2-NEXT: andnps %xmm3, %xmm2
429-
; SSE2-NEXT: orps %xmm4, %xmm2
430-
; SSE2-NEXT: movaps %xmm2, %xmm0
421+
; SSE2-NEXT: .LBB9_3:
422+
; SSE2-NEXT: maxss %xmm2, %xmm1
423+
; SSE2-NEXT: movaps %xmm1, %xmm0
431424
; SSE2-NEXT: retq
432425
;
433426
; AVX1-LABEL: test_fmaximum_combine_cmps:
@@ -437,15 +430,11 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
437430
; AVX1-NEXT: testl %eax, %eax
438431
; AVX1-NEXT: js .LBB9_1
439432
; AVX1-NEXT: # %bb.2:
440-
; AVX1-NEXT: vmovaps %xmm0, %xmm2
441-
; AVX1-NEXT: jmp .LBB9_3
433+
; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm0
434+
; AVX1-NEXT: retq
442435
; AVX1-NEXT: .LBB9_1:
443436
; AVX1-NEXT: vmovaps %xmm1, %xmm2
444-
; AVX1-NEXT: vmovaps %xmm0, %xmm1
445-
; AVX1-NEXT: .LBB9_3:
446-
; AVX1-NEXT: vmaxss %xmm2, %xmm1, %xmm0
447-
; AVX1-NEXT: vcmpunordss %xmm1, %xmm1, %xmm2
448-
; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0
437+
; AVX1-NEXT: vmaxss %xmm2, %xmm0, %xmm0
449438
; AVX1-NEXT: retq
450439
;
451440
; AVX512F-LABEL: test_fmaximum_combine_cmps:
@@ -459,8 +448,6 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
459448
; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
460449
; AVX512F-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
461450
; AVX512F-NEXT: vmaxss %xmm2, %xmm1, %xmm0
462-
; AVX512F-NEXT: vcmpunordss %xmm1, %xmm1, %k1
463-
; AVX512F-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
464451
; AVX512F-NEXT: retq
465452
;
466453
; AVX512DQ-LABEL: test_fmaximum_combine_cmps:
@@ -490,9 +477,7 @@ define float @test_fmaximum_combine_cmps(float %x, float %y) nounwind {
490477
; X86-NEXT: vmovaps %xmm0, %xmm2
491478
; X86-NEXT: vmovaps %xmm1, %xmm0
492479
; X86-NEXT: .LBB9_3:
493-
; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm1
494-
; X86-NEXT: vcmpunordss %xmm0, %xmm0, %xmm2
495-
; X86-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0
480+
; X86-NEXT: vmaxss %xmm2, %xmm0, %xmm0
496481
; X86-NEXT: vmovss %xmm0, (%esp)
497482
; X86-NEXT: flds (%esp)
498483
; X86-NEXT: popl %eax

0 commit comments

Comments
 (0)