Skip to content

Commit bbf8b24

Browse files
committed
Avoid maxnum(sNaN, x) optimizations / folds
The behaviour of constant-folding maxnum(sNaN, x) and minnum(sNaN, x) has become controvertial, and there are ongoing discussions about which behaviour we want to specify in the LLVM IR LangRef. See: - #170082 - #168838 - #138451 - #170067 - https://discourse.llvm.org/t/rfc-a-consistent-set-of-semantics-for-the-floating-point-minimum-and-maximum-operations/89006 This patch removes optimizations and constant-folding support for maxnum(sNaN, x) but keeps it folded/optimized for qNaNs. This should allow for some more flexibility so the implementation can conform to either the old or new version of the semantics specified without any changes. As far as I am aware, optimizations involving constant sNaN should generally be edge-cases that rarely occur, so here should hopefully be very little real-world performance impact from disabling these optimizations.
1 parent 2c21790 commit bbf8b24

File tree

11 files changed

+142
-59
lines changed

11 files changed

+142
-59
lines changed

llvm/lib/Analysis/ConstantFolding.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3348,8 +3348,12 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
33483348
case Intrinsic::copysign:
33493349
return ConstantFP::get(Ty->getContext(), APFloat::copySign(Op1V, Op2V));
33503350
case Intrinsic::minnum:
3351+
if (Op1V.isSignaling() || Op2V.isSignaling())
3352+
return nullptr;
33513353
return ConstantFP::get(Ty->getContext(), minnum(Op1V, Op2V));
33523354
case Intrinsic::maxnum:
3355+
if (Op1V.isSignaling() || Op2V.isSignaling())
3356+
return nullptr;
33533357
return ConstantFP::get(Ty->getContext(), maxnum(Op1V, Op2V));
33543358
case Intrinsic::minimum:
33553359
return ConstantFP::get(Ty->getContext(), minimum(Op1V, Op2V));

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6620,7 +6620,8 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
66206620
assert(OutNewConstVal != nullptr);
66216621

66226622
bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
6623-
bool PropagateSNaN = IID == Intrinsic::minnum || IID == Intrinsic::maxnum;
6623+
bool ReturnsOtherForAllNaNs =
6624+
IID == Intrinsic::minimumnum || IID == Intrinsic::maximumnum;
66246625
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
66256626
IID == Intrinsic::minimumnum;
66266627

@@ -6637,29 +6638,27 @@ static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
66376638

66386639
// minnum(x, qnan) -> x
66396640
// maxnum(x, qnan) -> x
6640-
// minnum(x, snan) -> qnan
6641-
// maxnum(x, snan) -> qnan
66426641
// minimum(X, nan) -> qnan
66436642
// maximum(X, nan) -> qnan
66446643
// minimumnum(X, nan) -> x
66456644
// maximumnum(X, nan) -> x
66466645
if (CAPF.isNaN()) {
6647-
if (PropagateNaN || (PropagateSNaN && CAPF.isSignaling())) {
6646+
if (PropagateNaN) {
66486647
*OutNewConstVal = ConstantFP::get(CFP->getType(), CAPF.makeQuiet());
66496648
return MinMaxOptResult::UseNewConstVal;
6649+
} else if (ReturnsOtherForAllNaNs || !CAPF.isSignaling()) {
6650+
return MinMaxOptResult::UseOtherVal;
66506651
}
6651-
return MinMaxOptResult::UseOtherVal;
6652+
return MinMaxOptResult::CannotOptimize;
66526653
}
66536654

66546655
if (CAPF.isInfinity() || (Call && Call->hasNoInfs() && CAPF.isLargest())) {
6655-
// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
6656-
// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
66576656
// minimum(X, -inf) -> -inf if nnan
66586657
// maximum(X, +inf) -> +inf if nnan
66596658
// minimumnum(X, -inf) -> -inf
66606659
// maximumnum(X, +inf) -> +inf
66616660
if (CAPF.isNegative() == IsMin &&
6662-
(!PropagateNaN || (Call && Call->hasNoNaNs()))) {
6661+
(ReturnsOtherForAllNaNs || (Call && Call->hasNoNaNs()))) {
66636662
*OutNewConstVal = const_cast<Constant *>(RHSConst);
66646663
return MinMaxOptResult::UseNewConstVal;
66656664
}
@@ -7004,12 +7003,10 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
70047003
case Intrinsic::minimum:
70057004
case Intrinsic::maximumnum:
70067005
case Intrinsic::minimumnum: {
7007-
// In several cases here, we deviate from exact IEEE 754 semantics
7008-
// to enable optimizations (as allowed by the LLVM IR spec).
7009-
//
7010-
// For instance, we may return one of the arguments unmodified instead of
7011-
// inserting an llvm.canonicalize to transform input sNaNs into qNaNs,
7012-
// or may assume all NaN inputs are qNaNs.
7006+
// In some cases here, we deviate from exact IEEE-754 semantics to enable
7007+
// optimizations (as allowed by the LLVM IR spec) by returning one of the
7008+
// arguments unmodified instead of inserting an llvm.canonicalize to
7009+
// transform input sNaNs into qNaNs,
70137010

70147011
// If the arguments are the same, this is a no-op (ignoring NaN quieting)
70157012
if (Op0 == Op1)

llvm/lib/CodeGen/GlobalISel/Utils.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,8 +768,12 @@ llvm::ConstantFoldFPBinOp(unsigned Opcode, const Register Op1,
768768
C1.copySign(C2);
769769
return C1;
770770
case TargetOpcode::G_FMINNUM:
771+
if (C1.isSignaling() || C2.isSignaling())
772+
return std::nullopt;
771773
return minnum(C1, C2);
772774
case TargetOpcode::G_FMAXNUM:
775+
if (C1.isSignaling() || C2.isSignaling())
776+
return std::nullopt;
773777
return maxnum(C1, C2);
774778
case TargetOpcode::G_FMINIMUM:
775779
return minimum(C1, C2);

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19505,7 +19505,8 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
1950519505
const SDNodeFlags Flags = N->getFlags();
1950619506
unsigned Opc = N->getOpcode();
1950719507
bool PropAllNaNsToQNaNs = Opc == ISD::FMINIMUM || Opc == ISD::FMAXIMUM;
19508-
bool PropOnlySNaNsToQNaNs = Opc == ISD::FMINNUM || Opc == ISD::FMAXNUM;
19508+
bool ReturnsOtherForAllNaNs =
19509+
Opc == ISD::FMINIMUMNUM || Opc == ISD::FMAXIMUMNUM;
1950919510
bool IsMin =
1951019511
Opc == ISD::FMINNUM || Opc == ISD::FMINIMUM || Opc == ISD::FMINIMUMNUM;
1951119512
SelectionDAG::FlagInserter FlagsInserter(DAG, N);
@@ -19524,32 +19525,30 @@ SDValue DAGCombiner::visitFMinMax(SDNode *N) {
1952419525

1952519526
// minnum(X, qnan) -> X
1952619527
// maxnum(X, qnan) -> X
19527-
// minnum(X, snan) -> qnan
19528-
// maxnum(X, snan) -> qnan
1952919528
// minimum(X, nan) -> qnan
1953019529
// maximum(X, nan) -> qnan
1953119530
// minimumnum(X, nan) -> X
1953219531
// maximumnum(X, nan) -> X
1953319532
if (AF.isNaN()) {
19534-
if (PropAllNaNsToQNaNs || (AF.isSignaling() && PropOnlySNaNsToQNaNs)) {
19533+
if (PropAllNaNsToQNaNs) {
1953519534
if (AF.isSignaling())
1953619535
return DAG.getConstantFP(AF.makeQuiet(), SDLoc(N), VT);
1953719536
return N->getOperand(1);
19537+
} else if (ReturnsOtherForAllNaNs || !AF.isSignaling()) {
19538+
return N->getOperand(0);
1953819539
}
19539-
return N->getOperand(0);
19540+
return SDValue();
1954019541
}
1954119542

1954219543
// In the following folds, inf can be replaced with the largest finite
1954319544
// float, if the ninf flag is set.
1954419545
if (AF.isInfinity() || (Flags.hasNoInfs() && AF.isLargest())) {
19545-
// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
19546-
// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
1954719546
// minimum(X, -inf) -> -inf if nnan
1954819547
// maximum(X, +inf) -> +inf if nnan
1954919548
// minimumnum(X, -inf) -> -inf
1955019549
// maximumnum(X, +inf) -> +inf
1955119550
if (IsMin == AF.isNegative() &&
19552-
(!PropAllNaNsToQNaNs || Flags.hasNoNaNs()))
19551+
(ReturnsOtherForAllNaNs || Flags.hasNoNaNs()))
1955319552
return N->getOperand(1);
1955419553

1955519554
// minnum(X, +inf) -> X if nnan

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7382,8 +7382,12 @@ SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
73827382
C1.copySign(C2);
73837383
return getConstantFP(C1, DL, VT);
73847384
case ISD::FMINNUM:
7385+
if (C1.isSignaling() || C2.isSignaling())
7386+
return SDValue();
73857387
return getConstantFP(minnum(C1, C2), DL, VT);
73867388
case ISD::FMAXNUM:
7389+
if (C1.isSignaling() || C2.isSignaling())
7390+
return SDValue();
73877391
return getConstantFP(maxnum(C1, C2), DL, VT);
73887392
case ISD::FMINIMUM:
73897393
return getConstantFP(minimum(C1, C2), DL, VT);

llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -497,10 +497,12 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
497497
ret void
498498
}
499499

500-
; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.
500+
; FIXME: Should there be more checks here? minnum with sNaN operand might get simplified away.
501501

502502
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
503-
; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
503+
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
504+
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
505+
; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
504506
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
505507
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
506508
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id

llvm/test/CodeGen/X86/fmaxnum.ll

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -676,15 +676,44 @@ define float @test_maxnum_neg_inf_nnan(float %x, float %y) nounwind {
676676

677677
; Test SNaN quieting
678678
define float @test_maxnum_snan(float %x) {
679-
; SSE-LABEL: test_maxnum_snan:
680-
; SSE: # %bb.0:
681-
; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
682-
; SSE-NEXT: retq
679+
; SSE2-LABEL: test_maxnum_snan:
680+
; SSE2: # %bb.0:
681+
; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
682+
; SSE2-NEXT: movaps %xmm0, %xmm1
683+
; SSE2-NEXT: cmpunordss %xmm0, %xmm1
684+
; SSE2-NEXT: movaps %xmm1, %xmm3
685+
; SSE2-NEXT: andps %xmm2, %xmm3
686+
; SSE2-NEXT: maxss %xmm0, %xmm2
687+
; SSE2-NEXT: andnps %xmm2, %xmm1
688+
; SSE2-NEXT: orps %xmm3, %xmm1
689+
; SSE2-NEXT: movaps %xmm1, %xmm0
690+
; SSE2-NEXT: retq
683691
;
684-
; AVX-LABEL: test_maxnum_snan:
685-
; AVX: # %bb.0:
686-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
687-
; AVX-NEXT: retq
692+
; SSE4-LABEL: test_maxnum_snan:
693+
; SSE4: # %bb.0:
694+
; SSE4-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
695+
; SSE4-NEXT: maxss %xmm0, %xmm1
696+
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
697+
; SSE4-NEXT: blendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
698+
; SSE4-NEXT: movaps %xmm1, %xmm0
699+
; SSE4-NEXT: retq
700+
;
701+
; AVX1-LABEL: test_maxnum_snan:
702+
; AVX1: # %bb.0:
703+
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
704+
; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm1
705+
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
706+
; AVX1-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
707+
; AVX1-NEXT: retq
708+
;
709+
; AVX512-LABEL: test_maxnum_snan:
710+
; AVX512: # %bb.0:
711+
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
712+
; AVX512-NEXT: vmaxss %xmm0, %xmm2, %xmm1
713+
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
714+
; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
715+
; AVX512-NEXT: vmovaps %xmm1, %xmm0
716+
; AVX512-NEXT: retq
688717
%r = call float @llvm.maxnum.f32(float 0x7ff4000000000000, float %x)
689718
ret float %r
690719
}

llvm/test/CodeGen/X86/fminnum.ll

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -676,15 +676,44 @@ define float @test_minnum_inf_nnan(float %x, float %y) nounwind {
676676

677677
; Test SNaN quieting
678678
define float @test_minnum_snan(float %x) {
679-
; SSE-LABEL: test_minnum_snan:
680-
; SSE: # %bb.0:
681-
; SSE-NEXT: movss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
682-
; SSE-NEXT: retq
679+
; SSE2-LABEL: test_minnum_snan:
680+
; SSE2: # %bb.0:
681+
; SSE2-NEXT: movss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
682+
; SSE2-NEXT: movaps %xmm0, %xmm1
683+
; SSE2-NEXT: cmpunordss %xmm0, %xmm1
684+
; SSE2-NEXT: movaps %xmm1, %xmm3
685+
; SSE2-NEXT: andps %xmm2, %xmm3
686+
; SSE2-NEXT: minss %xmm0, %xmm2
687+
; SSE2-NEXT: andnps %xmm2, %xmm1
688+
; SSE2-NEXT: orps %xmm3, %xmm1
689+
; SSE2-NEXT: movaps %xmm1, %xmm0
690+
; SSE2-NEXT: retq
683691
;
684-
; AVX-LABEL: test_minnum_snan:
685-
; AVX: # %bb.0:
686-
; AVX-NEXT: vmovss {{.*#+}} xmm0 = [NaN,0.0E+0,0.0E+0,0.0E+0]
687-
; AVX-NEXT: retq
692+
; SSE4-LABEL: test_minnum_snan:
693+
; SSE4: # %bb.0:
694+
; SSE4-NEXT: movss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
695+
; SSE4-NEXT: minss %xmm0, %xmm1
696+
; SSE4-NEXT: cmpunordss %xmm0, %xmm0
697+
; SSE4-NEXT: blendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
698+
; SSE4-NEXT: movaps %xmm1, %xmm0
699+
; SSE4-NEXT: retq
700+
;
701+
; AVX1-LABEL: test_minnum_snan:
702+
; AVX1: # %bb.0:
703+
; AVX1-NEXT: vmovss {{.*#+}} xmm1 = [NaN,0.0E+0,0.0E+0,0.0E+0]
704+
; AVX1-NEXT: vminss %xmm0, %xmm1, %xmm1
705+
; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
706+
; AVX1-NEXT: vblendvps %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
707+
; AVX1-NEXT: retq
708+
;
709+
; AVX512-LABEL: test_minnum_snan:
710+
; AVX512: # %bb.0:
711+
; AVX512-NEXT: vmovss {{.*#+}} xmm2 = [NaN,0.0E+0,0.0E+0,0.0E+0]
712+
; AVX512-NEXT: vminss %xmm0, %xmm2, %xmm1
713+
; AVX512-NEXT: vcmpunordss %xmm0, %xmm0, %k1
714+
; AVX512-NEXT: vmovss %xmm2, %xmm1, %xmm1 {%k1}
715+
; AVX512-NEXT: vmovaps %xmm1, %xmm0
716+
; AVX512-NEXT: retq
688717
%r = call float @llvm.minnum.f32(float 0x7ff4000000000000, float %x)
689718
ret float %r
690719
}

llvm/test/Transforms/InstCombine/simplify-demanded-fpclass.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ declare float @llvm.trunc.f32(float)
1010
declare float @llvm.arithmetic.fence.f32(float)
1111
declare float @llvm.minnum.f32(float, float)
1212
declare float @llvm.maxnum.f32(float, float)
13+
declare float @llvm.minimumnum.f32(float, float)
14+
declare float @llvm.maximumnum.f32(float, float)
1315

1416

1517
define float @ninf_user_select_inf(i1 %cond, float %x, float %y) {
@@ -1314,7 +1316,7 @@ define nofpclass(pinf) float @ret_nofpclass_pinf__minnum_ninf(i1 %cond, float %x
13141316
; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
13151317
; CHECK-NEXT: ret float 0xFFF0000000000000
13161318
;
1317-
%min = call float @llvm.minnum.f32(float %x, float 0xFFF0000000000000)
1319+
%min = call float @llvm.minimumnum.f32(float %x, float 0xFFF0000000000000)
13181320
ret float %min
13191321
}
13201322

@@ -1335,6 +1337,6 @@ define nofpclass(ninf) float @ret_nofpclass_ninf__maxnum_pinf(i1 %cond, float %x
13351337
; CHECK-SAME: (i1 [[COND:%.*]], float [[X:%.*]]) {
13361338
; CHECK-NEXT: ret float 0x7FF0000000000000
13371339
;
1338-
%max = call float @llvm.maxnum.f32(float %x, float 0x7FF0000000000000)
1340+
%max = call float @llvm.maximumnum.f32(float %x, float 0x7FF0000000000000)
13391341
ret float %max
13401342
}

llvm/test/Transforms/InstSimplify/ConstProp/min-max.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -97,15 +97,17 @@ define float @minnum_float_qnan_p0() {
9797

9898
define float @minnum_float_p0_snan() {
9999
; CHECK-LABEL: @minnum_float_p0_snan(
100-
; CHECK-NEXT: ret float 0x7FFC000000000000
100+
; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float 0.000000e+00, float 0x7FF4000000000000)
101+
; CHECK-NEXT: ret float [[MIN]]
101102
;
102103
%min = call float @llvm.minnum.f32(float 0.0, float 0x7FF4000000000000)
103104
ret float %min
104105
}
105106

106107
define float @minnum_float_snan_p0() {
107108
; CHECK-LABEL: @minnum_float_snan_p0(
108-
; CHECK-NEXT: ret float 0x7FFC000000000000
109+
; CHECK-NEXT: [[MIN:%.*]] = call float @llvm.minnum.f32(float 0x7FF4000000000000, float 0.000000e+00)
110+
; CHECK-NEXT: ret float [[MIN]]
109111
;
110112
%min = call float @llvm.minnum.f32(float 0x7FF4000000000000, float 0.0)
111113
ret float %min
@@ -205,15 +207,17 @@ define float @maxnum_float_qnan_p0() {
205207

206208
define float @maxnum_float_p0_snan() {
207209
; CHECK-LABEL: @maxnum_float_p0_snan(
208-
; CHECK-NEXT: ret float 0x7FFC000000000000
210+
; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float 0.000000e+00, float 0x7FF4000000000000)
211+
; CHECK-NEXT: ret float [[MAX]]
209212
;
210213
%max = call float @llvm.maxnum.f32(float 0.0, float 0x7FF4000000000000)
211214
ret float %max
212215
}
213216

214217
define float @maxnum_float_snan_p0() {
215218
; CHECK-LABEL: @maxnum_float_snan_p0(
216-
; CHECK-NEXT: ret float 0x7FFC000000000000
219+
; CHECK-NEXT: [[MAX:%.*]] = call float @llvm.maxnum.f32(float 0x7FF4000000000000, float 0.000000e+00)
220+
; CHECK-NEXT: ret float [[MAX]]
217221
;
218222
%max = call float @llvm.maxnum.f32(float 0x7FF4000000000000, float 0.0)
219223
ret float %max

0 commit comments

Comments
 (0)