Skip to content

Commit 17efa57

Browse files
[InstSimplify] Optimize maximumnum and minimumnum (#139581)
Add support for the new maximumnum and minimumnum intrinsics in various optimizations in InstSimplify. Also, change the behavior of optimizing maxnum(sNaN, x) to simplify to qNaN instead of x to better match the LLVM IR spec, and add more tests for sNaN behavior for all 3 max/min intrinsic types.
1 parent a798a10 commit 17efa57

File tree

6 files changed

+297
-180
lines changed

6 files changed

+297
-180
lines changed

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 137 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -6473,7 +6473,8 @@ static Value *foldMinMaxSharedOp(Intrinsic::ID IID, Value *Op0, Value *Op1) {
64736473
static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
64746474
Value *Op1) {
64756475
assert((IID == Intrinsic::maxnum || IID == Intrinsic::minnum ||
6476-
IID == Intrinsic::maximum || IID == Intrinsic::minimum) &&
6476+
IID == Intrinsic::maximum || IID == Intrinsic::minimum ||
6477+
IID == Intrinsic::maximumnum || IID == Intrinsic::minimumnum) &&
64776478
"Unsupported intrinsic");
64786479

64796480
auto *M0 = dyn_cast<IntrinsicInst>(Op0);
@@ -6512,6 +6513,82 @@ static Value *foldMinimumMaximumSharedOp(Intrinsic::ID IID, Value *Op0,
65126513
return nullptr;
65136514
}
65146515

6516+
enum class MinMaxOptResult {
6517+
CannotOptimize = 0,
6518+
UseNewConstVal = 1,
6519+
UseOtherVal = 2,
6520+
// For undef/poison, we can choose to either propgate undef/poison or
6521+
// use the LHS value depending on what will allow more optimization.
6522+
UseEither = 3
6523+
};
6524+
// Get the optimized value for a min/max instruction with a single constant
6525+
// input (either undef or scalar constantFP). The result may indicate to
6526+
// use the non-const LHS value, use a new constant value instead (with NaNs
6527+
// quieted), or to choose either option in the case of undef/poison.
6528+
static MinMaxOptResult OptimizeConstMinMax(const Constant *RHSConst,
6529+
const Intrinsic::ID IID,
6530+
const CallBase *Call,
6531+
Constant **OutNewConstVal) {
6532+
assert(OutNewConstVal != nullptr);
6533+
6534+
bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
6535+
bool PropagateSNaN = IID == Intrinsic::minnum || IID == Intrinsic::maxnum;
6536+
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
6537+
IID == Intrinsic::minimumnum;
6538+
6539+
// min/max(x, poison) -> either x or poison
6540+
if (isa<UndefValue>(RHSConst)) {
6541+
*OutNewConstVal = const_cast<Constant *>(RHSConst);
6542+
return MinMaxOptResult::UseEither;
6543+
}
6544+
6545+
const ConstantFP *CFP = dyn_cast<ConstantFP>(RHSConst);
6546+
if (!CFP)
6547+
return MinMaxOptResult::CannotOptimize;
6548+
APFloat CAPF = CFP->getValueAPF();
6549+
6550+
// minnum(x, qnan) -> x
6551+
// maxnum(x, qnan) -> x
6552+
// minnum(x, snan) -> qnan
6553+
// maxnum(x, snan) -> qnan
6554+
// minimum(X, nan) -> qnan
6555+
// maximum(X, nan) -> qnan
6556+
// minimumnum(X, nan) -> x
6557+
// maximumnum(X, nan) -> x
6558+
if (CAPF.isNaN()) {
6559+
if (PropagateNaN || (PropagateSNaN && CAPF.isSignaling())) {
6560+
*OutNewConstVal = ConstantFP::get(CFP->getType(), CAPF.makeQuiet());
6561+
return MinMaxOptResult::UseNewConstVal;
6562+
}
6563+
return MinMaxOptResult::UseOtherVal;
6564+
}
6565+
6566+
if (CAPF.isInfinity() || (Call && Call->hasNoInfs() && CAPF.isLargest())) {
6567+
// minnum(X, -inf) -> -inf (ignoring sNaN -> qNaN propagation)
6568+
// maxnum(X, +inf) -> +inf (ignoring sNaN -> qNaN propagation)
6569+
// minimum(X, -inf) -> -inf if nnan
6570+
// maximum(X, +inf) -> +inf if nnan
6571+
// minimumnum(X, -inf) -> -inf
6572+
// maximumnum(X, +inf) -> +inf
6573+
if (CAPF.isNegative() == IsMin &&
6574+
(!PropagateNaN || (Call && Call->hasNoNaNs()))) {
6575+
*OutNewConstVal = const_cast<Constant *>(RHSConst);
6576+
return MinMaxOptResult::UseNewConstVal;
6577+
}
6578+
6579+
// minnum(X, +inf) -> X if nnan
6580+
// maxnum(X, -inf) -> X if nnan
6581+
// minimum(X, +inf) -> X (ignoring quieting of sNaNs)
6582+
// maximum(X, -inf) -> X (ignoring quieting of sNaNs)
6583+
// minimumnum(X, +inf) -> X if nnan
6584+
// maximumnum(X, -inf) -> X if nnan
6585+
if (CAPF.isNegative() != IsMin &&
6586+
(PropagateNaN || (Call && Call->hasNoNaNs())))
6587+
return MinMaxOptResult::UseOtherVal;
6588+
}
6589+
return MinMaxOptResult::CannotOptimize;
6590+
}
6591+
65156592
Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
65166593
Value *Op0, Value *Op1,
65176594
const SimplifyQuery &Q,
@@ -6780,49 +6857,73 @@ Value *llvm::simplifyBinaryIntrinsic(Intrinsic::ID IID, Type *ReturnType,
67806857
case Intrinsic::maxnum:
67816858
case Intrinsic::minnum:
67826859
case Intrinsic::maximum:
6783-
case Intrinsic::minimum: {
6784-
// If the arguments are the same, this is a no-op.
6860+
case Intrinsic::minimum:
6861+
case Intrinsic::maximumnum:
6862+
case Intrinsic::minimumnum: {
6863+
// In several cases here, we deviate from exact IEEE 754 semantics
6864+
// to enable optimizations (as allowed by the LLVM IR spec).
6865+
//
6866+
// For instance, we may return one of the arguments unmodified instead of
6867+
// inserting an llvm.canonicalize to transform input sNaNs into qNaNs,
6868+
// or may assume all NaN inputs are qNaNs.
6869+
6870+
// If the arguments are the same, this is a no-op (ignoring NaN quieting)
67856871
if (Op0 == Op1)
67866872
return Op0;
67876873

67886874
// Canonicalize constant operand as Op1.
67896875
if (isa<Constant>(Op0))
67906876
std::swap(Op0, Op1);
67916877

6792-
// If an argument is undef, return the other argument.
6793-
if (Q.isUndefValue(Op1))
6794-
return Op0;
6878+
if (Constant *C = dyn_cast<Constant>(Op1)) {
6879+
MinMaxOptResult OptResult = MinMaxOptResult::CannotOptimize;
6880+
Constant *NewConst = nullptr;
6881+
6882+
if (VectorType *VTy = dyn_cast<VectorType>(C->getType())) {
6883+
ElementCount ElemCount = VTy->getElementCount();
6884+
6885+
if (Constant *SplatVal = C->getSplatValue()) {
6886+
// Handle splat vectors (including scalable vectors)
6887+
OptResult = OptimizeConstMinMax(SplatVal, IID, Call, &NewConst);
6888+
if (OptResult == MinMaxOptResult::UseNewConstVal)
6889+
NewConst = ConstantVector::getSplat(ElemCount, NewConst);
6890+
6891+
} else if (ElemCount.isFixed()) {
6892+
// Storage to build up new const return value (with NaNs quieted)
6893+
SmallVector<Constant *, 16> NewC(ElemCount.getFixedValue());
6894+
6895+
// Check elementwise whether we can optimize to either a constant
6896+
// value or return the LHS value. We cannot mix and match LHS +
6897+
// constant elements, as this would require inserting a new
6898+
// VectorShuffle instruction, which is not allowed in simplifyBinOp.
6899+
OptResult = MinMaxOptResult::UseEither;
6900+
for (unsigned i = 0; i != ElemCount.getFixedValue(); ++i) {
6901+
auto ElemResult = OptimizeConstMinMax(C->getAggregateElement(i),
6902+
IID, Call, &NewConst);
6903+
if (ElemResult == MinMaxOptResult::CannotOptimize ||
6904+
(ElemResult != OptResult &&
6905+
OptResult != MinMaxOptResult::UseEither &&
6906+
ElemResult != MinMaxOptResult::UseEither)) {
6907+
OptResult = MinMaxOptResult::CannotOptimize;
6908+
break;
6909+
}
6910+
NewC[i] = NewConst;
6911+
if (ElemResult != MinMaxOptResult::UseEither)
6912+
OptResult = ElemResult;
6913+
}
6914+
if (OptResult == MinMaxOptResult::UseNewConstVal)
6915+
NewConst = ConstantVector::get(NewC);
6916+
}
6917+
} else {
6918+
// Handle scalar inputs
6919+
OptResult = OptimizeConstMinMax(C, IID, Call, &NewConst);
6920+
}
67956921

6796-
bool PropagateNaN = IID == Intrinsic::minimum || IID == Intrinsic::maximum;
6797-
bool IsMin = IID == Intrinsic::minimum || IID == Intrinsic::minnum;
6798-
6799-
// minnum(X, nan) -> X
6800-
// maxnum(X, nan) -> X
6801-
// minimum(X, nan) -> nan
6802-
// maximum(X, nan) -> nan
6803-
if (match(Op1, m_NaN()))
6804-
return PropagateNaN ? propagateNaN(cast<Constant>(Op1)) : Op0;
6805-
6806-
// In the following folds, inf can be replaced with the largest finite
6807-
// float, if the ninf flag is set.
6808-
const APFloat *C;
6809-
if (match(Op1, m_APFloat(C)) &&
6810-
(C->isInfinity() || (Call && Call->hasNoInfs() && C->isLargest()))) {
6811-
// minnum(X, -inf) -> -inf
6812-
// maxnum(X, +inf) -> +inf
6813-
// minimum(X, -inf) -> -inf if nnan
6814-
// maximum(X, +inf) -> +inf if nnan
6815-
if (C->isNegative() == IsMin &&
6816-
(!PropagateNaN || (Call && Call->hasNoNaNs())))
6817-
return ConstantFP::get(ReturnType, *C);
6818-
6819-
// minnum(X, +inf) -> X if nnan
6820-
// maxnum(X, -inf) -> X if nnan
6821-
// minimum(X, +inf) -> X
6822-
// maximum(X, -inf) -> X
6823-
if (C->isNegative() != IsMin &&
6824-
(PropagateNaN || (Call && Call->hasNoNaNs())))
6825-
return Op0;
6922+
if (OptResult == MinMaxOptResult::UseOtherVal ||
6923+
OptResult == MinMaxOptResult::UseEither)
6924+
return Op0; // Return the other arg (ignoring NaN quieting)
6925+
else if (OptResult == MinMaxOptResult::UseNewConstVal)
6926+
return NewConst;
68266927
}
68276928

68286929
// Min/max of the same operation with common operand:

llvm/lib/Analysis/ValueTracking.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9095,6 +9095,10 @@ Intrinsic::ID llvm::getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID) {
90959095
case Intrinsic::minimum: return Intrinsic::maximum;
90969096
case Intrinsic::maxnum: return Intrinsic::minnum;
90979097
case Intrinsic::minnum: return Intrinsic::maxnum;
9098+
case Intrinsic::maximumnum:
9099+
return Intrinsic::minimumnum;
9100+
case Intrinsic::minimumnum:
9101+
return Intrinsic::maximumnum;
90989102
default: llvm_unreachable("Unexpected intrinsic");
90999103
}
91009104
}

llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
497497
ret void
498498
}
499499

500-
; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
500+
; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.
501501

502502
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
503-
; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
504-
; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
505-
; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
503+
; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
506504
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
507505
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
508506
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id

llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1949,8 +1949,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 {
19491949
; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee:
19501950
; GCN: ; %bb.0:
19511951
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1952-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
1953-
; GCN-NEXT: v_max_f32_e32 v0, v0, v0
1952+
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
19541953
; GCN-NEXT: s_setpc_b64 s[30:31]
19551954
%min = call float @llvm.minimumnum.f32(float %a, float %a)
19561955
%min.fneg = fneg float %min
@@ -1961,7 +1960,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
19611960
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
19621961
; GCN: ; %bb.0:
19631962
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1964-
; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
1963+
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
19651964
; GCN-NEXT: s_setpc_b64 s[30:31]
19661965
%min = call float @llvm.minimumnum.f32(float %a, float %a)
19671966
%min.fneg = fneg float %min
@@ -2285,8 +2284,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 {
22852284
; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee:
22862285
; GCN: ; %bb.0:
22872286
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2288-
; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
2289-
; GCN-NEXT: v_min_f32_e32 v0, v0, v0
2287+
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
22902288
; GCN-NEXT: s_setpc_b64 s[30:31]
22912289
%max = call float @llvm.maximumnum.f32(float %a, float %a)
22922290
%max.fneg = fneg float %max
@@ -2297,7 +2295,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
22972295
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
22982296
; GCN: ; %bb.0:
22992297
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2300-
; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
2298+
; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
23012299
; GCN-NEXT: s_setpc_b64 s[30:31]
23022300
%max = call float @llvm.maximumnum.f32(float %a, float %a)
23032301
%max.fneg = fneg float %max

llvm/test/Transforms/InstCombine/AMDGPU/fmed3.ll

Lines changed: 11 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -269,42 +269,27 @@ define float @fmed3_constant_src2_1_f32(float %x, float %y) #1 {
269269
}
270270

271271
define float @fmed3_x_qnan0_qnan1_f32(float %x) #1 {
272-
; IEEE1-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
273-
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
274-
; IEEE1-NEXT: ret float [[X]]
275-
;
276-
; IEEE0-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
277-
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
278-
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
279-
; IEEE0-NEXT: ret float [[MED3]]
272+
; CHECK-LABEL: define float @fmed3_x_qnan0_qnan1_f32(
273+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
274+
; CHECK-NEXT: ret float [[X]]
280275
;
281276
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
282277
ret float %med3
283278
}
284279

285280
define float @fmed3_qnan0_x_qnan1_f32(float %x) #1 {
286-
; IEEE1-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
287-
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
288-
; IEEE1-NEXT: ret float [[X]]
289-
;
290-
; IEEE0-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
291-
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
292-
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
293-
; IEEE0-NEXT: ret float [[MED3]]
281+
; CHECK-LABEL: define float @fmed3_qnan0_x_qnan1_f32(
282+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
283+
; CHECK-NEXT: ret float [[X]]
294284
;
295285
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
296286
ret float %med3
297287
}
298288

299289
define float @fmed3_qnan0_qnan1_x_f32(float %x) #1 {
300-
; IEEE1-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
301-
; IEEE1-SAME: float [[X:%.*]]) #[[ATTR1]] {
302-
; IEEE1-NEXT: ret float [[X]]
303-
;
304-
; IEEE0-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
305-
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
306-
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF8002000000000)
307-
; IEEE0-NEXT: ret float [[MED3]]
290+
; CHECK-LABEL: define float @fmed3_qnan0_qnan1_x_f32(
291+
; CHECK-SAME: float [[X:%.*]]) #[[ATTR1]] {
292+
; CHECK-NEXT: ret float [[X]]
308293
;
309294
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
310295
ret float %med3
@@ -448,8 +433,7 @@ define float @fmed3_snan1_x_snan2_f32(float %x) #1 {
448433
;
449434
; IEEE0-LABEL: define float @fmed3_snan1_x_snan2_f32(
450435
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
451-
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
452-
; IEEE0-NEXT: ret float [[MED3]]
436+
; IEEE0-NEXT: ret float [[X]]
453437
;
454438
%med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF0000020000000, float %x, float 0x7FF0000040000000)
455439
ret float %med3
@@ -462,8 +446,7 @@ define float @fmed3_x_snan1_snan2_f32(float %x) #1 {
462446
;
463447
; IEEE0-LABEL: define float @fmed3_x_snan1_snan2_f32(
464448
; IEEE0-SAME: float [[X:%.*]]) #[[ATTR1]] {
465-
; IEEE0-NEXT: [[MED3:%.*]] = call float @llvm.minimumnum.f32(float [[X]], float 0x7FF0000040000000)
466-
; IEEE0-NEXT: ret float [[MED3]]
449+
; IEEE0-NEXT: ret float [[X]]
467450
;
468451
%med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF0000020000000, float 0x7FF0000040000000)
469452
ret float %med3

0 commit comments

Comments
 (0)