Skip to content

Commit f2a801b

Browse files
committed
[InstCombine] Inverse is.fpclass mask operand, when profitable
The `@llvm.is.fpclass` intrinsic is matched and generated by the InstCombine pass. When the number of set mask bits is greater than the number of the unset bits, it's profitable to replace the `is.fpclass(x, mask)` intrinsic call with a sequence of `!is.fpclass(x, ~mask)` operations. The following IR snippets are semantically equivalent: ```LLVM define i1 @src(float %src) { ; 639 == 0b1001111111 %class = call i1 @llvm.is.fpclass.f32(float %src, i32 639) ret i1 %class } define i1 @tgt(float %src) { ; 384 == 0b0110000000 == ~0b1001111111 & 0x3ff == ~639 & 0x3ff %class = call i1 @llvm.is.fpclass.f32(float %src, i32 384) %not = xor i1 %class, true ret i1 %not } ``` However, the generated code is more efficient for the 2nd IR sequence, at least on some targets. References: * https://alive2.llvm.org/ce/z/kkZhDt * https://godbolt.org/z/5WE8Wb3vz
1 parent f590963 commit f2a801b

File tree

7 files changed

+318
-118
lines changed

7 files changed

+318
-118
lines changed

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,9 +1522,21 @@ Value *InstCombinerImpl::foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS,
15221522
if (ClassValLHS == ClassValRHS) {
15231523
unsigned CombinedMask = IsAnd ? (ClassMaskLHS & ClassMaskRHS)
15241524
: (ClassMaskLHS | ClassMaskRHS);
1525-
return Builder.CreateIntrinsic(
1526-
Intrinsic::is_fpclass, {ClassValLHS->getType()},
1527-
{ClassValLHS, Builder.getInt32(CombinedMask)});
1525+
unsigned InverseCombinedMask = ~CombinedMask & fcAllFlags;
1526+
1527+
// If the number of bits set in the combined mask is greater than the
1528+
// number of the unset bits, it is more efficient to use the inverse
1529+
// mask and invert the result.
1530+
bool IsInverse = popcount(CombinedMask) > popcount(InverseCombinedMask);
1531+
auto *MaskVal =
1532+
Builder.getInt32(IsInverse ? InverseCombinedMask : CombinedMask);
1533+
1534+
auto *II = Builder.CreateIntrinsic(Intrinsic::is_fpclass,
1535+
{ClassValLHS->getType()},
1536+
{ClassValLHS, MaskVal});
1537+
if (IsInverse)
1538+
return Builder.CreateNot(II);
1539+
return II;
15281540
}
15291541
}
15301542
}
@@ -1610,10 +1622,25 @@ Instruction *InstCombinerImpl::foldLogicOfIsFPClass(BinaryOperator &BO,
16101622
bool IsRHSClass =
16111623
match(Op1, m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
16121624
m_Value(ClassVal1), m_ConstantInt(ClassMask1))));
1613-
if ((((IsLHSClass || matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&
1614-
(IsRHSClass || matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&
1625+
1626+
bool IsLHSInverseClass =
1627+
match(Op0, m_OneUse(m_Not(m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
1628+
m_Value(ClassVal0), m_ConstantInt(ClassMask0))))));
1629+
bool IsRHSInverseClass =
1630+
match(Op1, m_OneUse(m_Not(m_OneUse(m_Intrinsic<Intrinsic::is_fpclass>(
1631+
m_Value(ClassVal1), m_ConstantInt(ClassMask1))))));
1632+
1633+
if ((((IsLHSClass || IsLHSInverseClass ||
1634+
matchIsFPClassLikeFCmp(Op0, ClassVal0, ClassMask0)) &&
1635+
(IsRHSClass || IsRHSInverseClass ||
1636+
matchIsFPClassLikeFCmp(Op1, ClassVal1, ClassMask1)))) &&
16151637
ClassVal0 == ClassVal1) {
16161638
unsigned NewClassMask;
1639+
if (IsLHSInverseClass)
1640+
ClassMask0 = ~ClassMask0 & fcAllFlags;
1641+
if (IsRHSInverseClass)
1642+
ClassMask1 = ~ClassMask1 & fcAllFlags;
1643+
16171644
switch (BO.getOpcode()) {
16181645
case Instruction::And:
16191646
NewClassMask = ClassMask0 & ClassMask1;
@@ -4651,10 +4678,17 @@ Instruction *InstCombinerImpl::foldNot(BinaryOperator &I) {
46514678

46524679
if (II->getIntrinsicID() == Intrinsic::is_fpclass) {
46534680
ConstantInt *ClassMask = cast<ConstantInt>(II->getArgOperand(1));
4654-
II->setArgOperand(
4655-
1, ConstantInt::get(ClassMask->getType(),
4656-
~ClassMask->getZExtValue() & fcAllFlags));
4657-
return replaceInstUsesWith(I, II);
4681+
auto ClassMaskValue = ClassMask->getZExtValue();
4682+
auto InverseMaskValue = ~ClassMaskValue & fcAllFlags;
4683+
4684+
// If the number of set bits in the class mask is less than the number of
4685+
// set bits in the inverse mask, it's more efficient to keep the "not"
4686+
// instruction instead of inverting the class mask.
4687+
if (popcount(ClassMaskValue) > popcount(InverseMaskValue)) {
4688+
II->setArgOperand(
4689+
1, ConstantInt::get(ClassMask->getType(), InverseMaskValue));
4690+
return replaceInstUsesWith(I, II);
4691+
}
46584692
}
46594693
}
46604694

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,6 +1050,20 @@ Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
10501050
if (Mask == Known.KnownFPClasses)
10511051
return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
10521052

1053+
// If the number of set bits in the mask is greater than the number of the
1054+
// unset bits, it's more efficient to inverse the mask and the intrinsic
1055+
// result:
1056+
// if.fpclass(x, mask) -> !if.fpclass(x, ~mask)
1057+
//
1058+
auto InverseMask = ~Mask & fcAllFlags;
1059+
if (popcount<unsigned>(Mask) > popcount<unsigned>(InverseMask)) {
1060+
auto *NewII =
1061+
Builder.CreateIntrinsic(Intrinsic::is_fpclass, {Src0->getType()},
1062+
{Src0, Builder.getInt32(InverseMask)});
1063+
auto *Not = Builder.CreateNot(NewII);
1064+
return replaceInstUsesWith(II, Not);
1065+
}
1066+
10531067
return nullptr;
10541068
}
10551069

llvm/test/Transforms/InstCombine/combine-is.fpclass-and-fcmp.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,8 @@ define i1 @fcmp_issubnormal_and_class_finite(half %x) {
189189

190190
define i1 @class_inf_or_fcmp_issubnormal(half %x) {
191191
; CHECK-LABEL: @class_inf_or_fcmp_issubnormal(
192-
; CHECK-NEXT: [[OR:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 756)
192+
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 267)
193+
; CHECK-NEXT: [[OR:%.*]] = xor i1 [[TMP1]], true
193194
; CHECK-NEXT: ret i1 [[OR]]
194195
;
195196
%fabs = call half @llvm.fabs.f16(half %x)
@@ -289,7 +290,8 @@ define i1 @class_normal_or_fcmp_oeq_zero(half %x) {
289290

290291
define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
291292
; CHECK-LABEL: @fcmp_ueq_zero_or_class_normal(
292-
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
293+
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 660)
294+
; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
293295
; CHECK-NEXT: ret i1 [[CLASS]]
294296
;
295297
%ueq.inf = fcmp ueq half %x, 0.0
@@ -300,7 +302,8 @@ define i1 @fcmp_ueq_zero_or_class_normal(half %x) {
300302

301303
define i1 @class_normal_or_fcmp_ueq_zero(half %x) {
302304
; CHECK-LABEL: @class_normal_or_fcmp_ueq_zero(
303-
; CHECK-NEXT: [[CLASS:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 363)
305+
; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.is.fpclass.f16(half [[X:%.*]], i32 660)
306+
; CHECK-NEXT: [[CLASS:%.*]] = xor i1 [[TMP1]], true
304307
; CHECK-NEXT: ret i1 [[CLASS]]
305308
;
306309
%ueq.inf = fcmp ueq half %x, 0.0

0 commit comments

Comments
 (0)