Skip to content

Commit f99290e

Browse files
committed
[InstCombine] Canonicalize (sub nuw Mask, X) -> (xor Mask, X)
https://alive2.llvm.org/ce/z/zGwUBp This came up when trying to implment: `(ctlz Pow2)` -> `(sub/xor BW - 1, Log2(Pow2))` https://github.com/dtcxzyw/llvm-opt-benchmark/pull/1944/files#diff-b13a246c0599d6b2255fae8707bb4e36bf28221ff67f7ad54d169f7bd5ba4e22R13996
1 parent 0d9c027 commit f99290e

File tree

9 files changed

+38
-26
lines changed

9 files changed

+38
-26
lines changed

llvm/include/llvm/Transforms/InstCombine/InstCombiner.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,11 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner {
236236
return isFreeToInvert(V, WillInvertAllUses, Unused);
237237
}
238238

239+
/// If `Not` is true, returns true if V is a negative power of 2 or zero.
240+
/// If `Not` is false, returns true if V is a Mask or zero.
241+
bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
242+
unsigned Depth = 0);
243+
239244
/// Given i1 V, can every user of V be freely adapted if V is changed to !V ?
240245
/// InstCombine's freelyInvertAllUsersOf() must be kept in sync with this fn.
241246
/// NOTE: for Instructions only!

llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2780,6 +2780,12 @@ Instruction *InstCombinerImpl::visitSub(BinaryOperator &I) {
27802780
if (Instruction *Res = foldBinOpOfSelectAndCastOfSelectCondition(I))
27812781
return Res;
27822782

2783+
// Canonicalize (sub nuw Mask, X) -> (xor Mask, X)
2784+
if (I.hasNoUnsignedWrap() &&
2785+
isMaskOrZero(Op0, /*Not=*/false,
2786+
getSimplifyQuery().getWithInstruction(&I)))
2787+
return BinaryOperator::CreateXor(Op0, Op1);
2788+
27832789
return TryToNarrowDeduceFlags();
27842790
}
27852791

llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4326,8 +4326,8 @@ Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI,
43264326
}
43274327

43284328
// Returns whether V is a Mask ((X + 1) & X == 0) or ~Mask (-Pow2OrZero)
4329-
static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
4330-
unsigned Depth = 0) {
4329+
bool InstCombiner::isMaskOrZero(const Value *V, bool Not,
4330+
const SimplifyQuery &Q, unsigned Depth) {
43314331
if (Not ? match(V, m_NegatedPower2OrZero()) : match(V, m_LowBitMaskOrZero()))
43324332
return true;
43334333
if (V->getType()->getScalarSizeInBits() == 1)
@@ -4381,14 +4381,14 @@ static bool isMaskOrZero(const Value *V, bool Not, const SimplifyQuery &Q,
43814381
case Instruction::Add:
43824382
// Pow2 - 1 is a Mask.
43834383
if (!Not && match(I->getOperand(1), m_AllOnes()))
4384-
return isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
4385-
Depth, Q.AC, Q.CxtI, Q.DT);
4384+
return ::isKnownToBeAPowerOfTwo(I->getOperand(0), Q.DL, /*OrZero*/ true,
4385+
Depth, Q.AC, Q.CxtI, Q.DT);
43864386
break;
43874387
case Instruction::Sub:
43884388
// -Pow2 is a ~Mask.
43894389
if (Not && match(I->getOperand(0), m_Zero()))
4390-
return isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
4391-
Depth, Q.AC, Q.CxtI, Q.DT);
4390+
return ::isKnownToBeAPowerOfTwo(I->getOperand(1), Q.DL, /*OrZero*/ true,
4391+
Depth, Q.AC, Q.CxtI, Q.DT);
43924392
break;
43934393
case Instruction::Call: {
43944394
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
@@ -4497,13 +4497,13 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
44974497
if (match(Op0, m_c_And(m_Specific(Op1), m_Value(M)))) {
44984498
X = Op1;
44994499
// Look for: x & Mask pred x
4500-
if (isMaskOrZero(M, /*Not=*/false, Q)) {
4500+
if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
45014501
return !ICmpInst::isSigned(Pred) ||
45024502
(match(M, m_NonNegative()) || isKnownNonNegative(M, Q));
45034503
}
45044504

45054505
// Look for: x & ~Mask pred ~Mask
4506-
if (isMaskOrZero(X, /*Not=*/true, Q)) {
4506+
if (IC.isMaskOrZero(X, /*Not=*/true, Q)) {
45074507
return !ICmpInst::isSigned(Pred) || isKnownNonZero(X, Q);
45084508
}
45094509
return false;
@@ -4513,7 +4513,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
45134513

45144514
auto Check = [&]() {
45154515
// Look for: ~x | Mask == -1
4516-
if (isMaskOrZero(M, /*Not=*/false, Q)) {
4516+
if (IC.isMaskOrZero(M, /*Not=*/false, Q)) {
45174517
if (Value *NotX =
45184518
IC.getFreelyInverted(X, X->hasOneUse(), &IC.Builder)) {
45194519
X = NotX;
@@ -4531,7 +4531,7 @@ static Value *foldICmpWithLowBitMaskedVal(CmpPredicate Pred, Value *Op0,
45314531
match(Op0, m_OneUse(m_And(m_Value(X), m_Value(M))))) {
45324532
auto Check = [&]() {
45334533
// Look for: x & ~Mask == 0
4534-
if (isMaskOrZero(M, /*Not=*/true, Q)) {
4534+
if (IC.isMaskOrZero(M, /*Not=*/true, Q)) {
45354535
if (Value *NotM =
45364536
IC.getFreelyInverted(M, M->hasOneUse(), &IC.Builder)) {
45374537
M = NotM;

llvm/test/Transforms/InstCombine/sub-xor.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ define i32 @xor_dominating_cond(i32 %x) {
166166
; CHECK-NEXT: [[COND:%.*]] = icmp ult i32 [[X:%.*]], 256
167167
; CHECK-NEXT: br i1 [[COND]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
168168
; CHECK: if.then:
169-
; CHECK-NEXT: [[A:%.*]] = sub nuw nsw i32 255, [[X]]
169+
; CHECK-NEXT: [[A:%.*]] = xor i32 [[X]], 255
170170
; CHECK-NEXT: ret i32 [[A]]
171171
; CHECK: if.end:
172172
; CHECK-NEXT: ret i32 [[X]]

llvm/test/Transforms/InstCombine/sub.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2204,9 +2204,9 @@ define i8 @shrink_sub_from_constant_lowbits2(i8 %x) {
22042204

22052205
define <2 x i8> @shrink_sub_from_constant_lowbits3(<2 x i8> %x) {
22062206
; CHECK-LABEL: @shrink_sub_from_constant_lowbits3(
2207-
; CHECK-NEXT: [[X0000:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 4)
2208-
; CHECK-NEXT: [[SUB:%.*]] = sub nuw <2 x i8> splat (i8 24), [[X0000]]
2209-
; CHECK-NEXT: [[R:%.*]] = lshr exact <2 x i8> [[SUB]], splat (i8 3)
2207+
; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i8> [[X:%.*]], splat (i8 1)
2208+
; CHECK-NEXT: [[SUB:%.*]] = and <2 x i8> [[TMP1]], splat (i8 30)
2209+
; CHECK-NEXT: [[R:%.*]] = xor <2 x i8> [[SUB]], splat (i8 3)
22102210
; CHECK-NEXT: ret <2 x i8> [[R]]
22112211
;
22122212
%x0000 = shl <2 x i8> %x, <i8 4, i8 4> ; 4 low bits are known zero

llvm/test/Transforms/InstCombine/vec_demanded_elts-inseltpoison.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -215,7 +215,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {
215215

216216
define <3 x i8> @shuf_sub(<3 x i8> %x) {
217217
; CHECK-LABEL: @shuf_sub(
218-
; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
218+
; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
219219
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
220220
; CHECK-NEXT: ret <3 x i8> [[R]]
221221
;

llvm/test/Transforms/InstCombine/vec_demanded_elts.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ define <3 x i8> @shuf_add(<3 x i8> %x) {
218218

219219
define <3 x i8> @shuf_sub(<3 x i8> %x) {
220220
; CHECK-LABEL: @shuf_sub(
221-
; CHECK-NEXT: [[BO:%.*]] = sub nuw <3 x i8> <i8 1, i8 poison, i8 3>, [[X:%.*]]
221+
; CHECK-NEXT: [[BO:%.*]] = xor <3 x i8> [[X:%.*]], <i8 1, i8 poison, i8 3>
222222
; CHECK-NEXT: [[R:%.*]] = shufflevector <3 x i8> [[BO]], <3 x i8> poison, <3 x i32> <i32 0, i32 poison, i32 2>
223223
; CHECK-NEXT: ret <3 x i8> [[R]]
224224
;

llvm/test/Transforms/PhaseOrdering/X86/pr88239.ll

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@ define void @foo(ptr noalias noundef %0, ptr noalias noundef %1) optsize {
88
; CHECK-LABEL: define void @foo(
99
; CHECK-SAME: ptr noalias nocapture noundef readonly [[TMP0:%.*]], ptr noalias nocapture noundef writeonly [[TMP1:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
1010
; CHECK-NEXT: vector.ph:
11-
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 -28
1211
; CHECK-NEXT: br label [[TMP4:%.*]]
1312
; CHECK: vector.body:
1413
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[TMP2:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[TMP4]] ]
15-
; CHECK-NEXT: [[TMP3:%.*]] = sub nuw nsw i64 255, [[INDVARS_IV]]
16-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[INVARIANT_GEP]], i64 [[TMP3]]
17-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = load <8 x i32>, ptr [[GEP]], align 4
14+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[TMP2]] ], [ [[VEC_IND_NEXT:%.*]], [[TMP4]] ]
15+
; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i64> [[VEC_IND]], splat (i64 4294967295)
16+
; CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i64> [[TMP6]], splat (i64 255)
17+
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP0]], <8 x i64> [[TMP3]]
18+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = tail call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[TMP7]], i32 4, <8 x i1> splat (i1 true), <8 x i32> poison)
1819
; CHECK-NEXT: [[TMP5:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER]], splat (i32 5)
19-
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <8 x i32> [[TMP5]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2020
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[TMP1]], i64 [[INDVARS_IV]]
21-
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP10]], align 4
21+
; CHECK-NEXT: store <8 x i32> [[TMP5]], ptr [[TMP10]], align 4
2222
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw i64 [[INDVARS_IV]], 8
23+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
2324
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 256
2425
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[MIDDLE_BLOCK:%.*]], label [[TMP4]], !llvm.loop [[LOOP0:![0-9]+]]
2526
; CHECK: middle.block:

llvm/test/Transforms/PhaseOrdering/scev-custom-dl.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -141,14 +141,14 @@ define i32 @test_loop_idiom_recogize(i32 %x, i32 %y, ptr %lam, ptr %alp) nounwin
141141
; CHECK-NEXT: Classifying expressions for: @test_loop_idiom_recogize
142142
; CHECK-NEXT: %indvar = phi i32 [ 0, %bb1.thread ], [ %indvar.next, %bb1 ]
143143
; CHECK-NEXT: --> {0,+,1}<nuw><nsw><%bb1> U: [0,256) S: [0,256) Exits: 255 LoopDispositions: { %bb1: Computable }
144-
; CHECK-NEXT: %i.0.reg2mem.0 = sub nuw nsw i32 255, %indvar
145-
; CHECK-NEXT: --> {255,+,-1}<nsw><%bb1> U: [0,256) S: [0,256) Exits: 0 LoopDispositions: { %bb1: Computable }
144+
; CHECK-NEXT: %i.0.reg2mem.0 = xor i32 %indvar, 255
145+
; CHECK-NEXT: --> %i.0.reg2mem.0 U: [0,-2147483648) S: [0,-2147483648) Exits: 0 LoopDispositions: { %bb1: Variant }
146146
; CHECK-NEXT: %0 = getelementptr i32, ptr %alp, i32 %i.0.reg2mem.0
147-
; CHECK-NEXT: --> {(1020 + %alp),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Computable }
147+
; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %alp) U: full-set S: full-set Exits: %alp LoopDispositions: { %bb1: Variant }
148148
; CHECK-NEXT: %1 = load i32, ptr %0, align 4
149149
; CHECK-NEXT: --> %1 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %bb1: Variant }
150150
; CHECK-NEXT: %2 = getelementptr i32, ptr %lam, i32 %i.0.reg2mem.0
151-
; CHECK-NEXT: --> {(1020 + %lam),+,-4}<nw><%bb1> U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Computable }
151+
; CHECK-NEXT: --> ((4 * %i.0.reg2mem.0) + %lam) U: full-set S: full-set Exits: %lam LoopDispositions: { %bb1: Variant }
152152
; CHECK-NEXT: %indvar.next = add nuw nsw i32 %indvar, 1
153153
; CHECK-NEXT: --> {1,+,1}<nuw><nsw><%bb1> U: [1,257) S: [1,257) Exits: 256 LoopDispositions: { %bb1: Computable }
154154
; CHECK-NEXT: %tmp10 = mul i32 %x, 255

0 commit comments

Comments
 (0)