From f4223031001b497b8f77ca722233b593e2231623 Mon Sep 17 00:00:00 2001 From: Mihail Mihov Date: Thu, 23 Oct 2025 16:47:14 +0200 Subject: [PATCH 1/2] [InstCombine] Add failing test for CTLZ -> CTTZ simplification --- llvm/test/Transforms/InstCombine/ctlz-cttz.ll | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/ctlz-cttz.ll diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll new file mode 100644 index 0000000000000..7ff2eac64c0c4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll @@ -0,0 +1,155 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -S -passes=instcombine | FileCheck %s + +; ctpop(~i & (i - 1)) -> bitwidth - cttz(i, false) +define i8 @ctlz_to_sub_bw_cttz(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_poison(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_poison( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 true) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 true) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_different_add(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_different_add( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], 1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, 1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_different_xor(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_different_xor( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, 1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +declare void @use(i8) + +define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_dec( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: call void @use(i8 [[DEC]]) +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + call void @use(i8 %dec) + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_multi_use_not(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_not( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: call void @use(i8 [[NOT]]) +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + call void @use(i8 %not) + %and = and i8 %dec, %not + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_multi_use_and(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_and( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: call void @use(i8 [[AND]]) +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %dec, %not + call void @use(i8 %and) + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) { +; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_commute_and( +; CHECK-SAME: i8 [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 +; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: ret i8 [[CLZ]] +; + %dec = add i8 %a0, -1 + %not = xor i8 %a0, -1 + %and = and i8 %not, %dec + %clz = tail call i8 @llvm.ctlz.i8(i8 %and, i1 false) + ret i8 %clz +} + +define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(<2 x i8> %a0) { +; CHECK-LABEL: define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat( +; CHECK-SAME: <2 x i8> [[A0:%.*]]) { +; CHECK-NEXT: [[DEC:%.*]] = add <2 x i8> [[A0]], splat (i8 -1) +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i8> [[A0]], splat (i8 -1) +; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[DEC]], [[NOT]] +; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[AND]], i1 false) +; CHECK-NEXT: ret <2 x i8> [[CLZ]] +; + %dec = add <2 x i8> %a0, + %not = xor <2 x i8> %a0, + %and = and <2 x i8> %dec, %not + %clz = tail call <2 x i8>@llvm.ctlz.v2i8(<2 x i8> %and, i1 false) + ret <2 x i8> %clz +} From 171ee2bb70984bd8d8ccbb00035629ddd5137383 Mon Sep 17 00:00:00 2001 From: Mihail Mihov Date: Thu, 23 Oct 2025 17:11:28 +0200 Subject: [PATCH 2/2] [InstCombine] Add CTLZ -> CTTZ simplification --- .../InstCombine/InstCombineCalls.cpp | 12 +++++++ llvm/test/Transforms/InstCombine/ctlz-cttz.ll | 34 +++++++------------ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index e1e24a99d0474..3ce16b40d6cb6 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -585,6 +585,18 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC) { IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1); return BinaryOperator::CreateSub(ConstCtlz, X); } + + // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false) + if (Op0->hasOneUse() && + match(Op0, + m_c_And(m_Not(m_Value(X)), m_Add(m_Deferred(X), m_AllOnes())))) { + Type *Ty = II.getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty, + {X, IC.Builder.getFalse()}); + auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth)); + return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz)); + } } // cttz(Pow2) -> Log2(Pow2) diff --git a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll index 7ff2eac64c0c4..871fb342360fd 100644 --- a/llvm/test/Transforms/InstCombine/ctlz-cttz.ll +++ b/llvm/test/Transforms/InstCombine/ctlz-cttz.ll @@ -5,10 +5,8 @@ define i8 @ctlz_to_sub_bw_cttz(i8 %a0) { ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz( ; CHECK-SAME: i8 [[A0:%.*]]) { -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] ; CHECK-NEXT: ret i8 [[CLZ]] ; %dec = add i8 %a0, -1 @@ -21,10 +19,8 @@ define i8 @ctlz_to_sub_bw_cttz(i8 %a0) { define i8 @ctlz_to_sub_bw_cttz_poison(i8 %a0) { ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_poison( ; CHECK-SAME: i8 [[A0:%.*]]) { -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 true) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] ; CHECK-NEXT: ret i8 [[CLZ]] ; %dec = add i8 %a0, -1 @@ -73,9 +69,8 @@ define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(i8 %a0) { ; CHECK-SAME: i8 [[A0:%.*]]) { ; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 ; CHECK-NEXT: call void @use(i8 [[DEC]]) -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] ; CHECK-NEXT: ret i8 [[CLZ]] ; %dec = add i8 %a0, -1 @@ -89,11 +84,10 @@ define i8 @ctlz_to_sub_bw_cttz_multi_use_dec(i8 %a0) { define i8 @ctlz_to_sub_bw_cttz_multi_use_not(i8 %a0) { ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_multi_use_not( ; CHECK-SAME: i8 [[A0:%.*]]) { -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 ; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 ; CHECK-NEXT: call void @use(i8 [[NOT]]) -; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] ; CHECK-NEXT: ret i8 [[CLZ]] ; %dec = add i8 %a0, -1 @@ -125,10 +119,8 @@ define i8 @ctlz_to_sub_bw_cttz_multi_use_and(i8 %a0) { define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) { ; CHECK-LABEL: define i8 @ctlz_to_sub_bw_cttz_commute_and( ; CHECK-SAME: i8 [[A0:%.*]]) { -; CHECK-NEXT: [[DEC:%.*]] = add i8 [[A0]], -1 -; CHECK-NEXT: [[NOT:%.*]] = xor i8 [[A0]], -1 -; CHECK-NEXT: [[AND:%.*]] = and i8 [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) i8 @llvm.ctlz.i8(i8 [[AND]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) i8 @llvm.cttz.i8(i8 [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw i8 8, [[TMP1]] ; CHECK-NEXT: ret i8 [[CLZ]] ; %dec = add i8 %a0, -1 @@ -141,10 +133,8 @@ define i8 @ctlz_to_sub_bw_cttz_commute_and(i8 %a0) { define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat(<2 x i8> %a0) { ; CHECK-LABEL: define <2 x i8> @ctlz_to_sub_bw_cttz_vec_splat( ; CHECK-SAME: <2 x i8> [[A0:%.*]]) { -; CHECK-NEXT: [[DEC:%.*]] = add <2 x i8> [[A0]], splat (i8 -1) -; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i8> [[A0]], splat (i8 -1) -; CHECK-NEXT: [[AND:%.*]] = and <2 x i8> [[DEC]], [[NOT]] -; CHECK-NEXT: [[CLZ:%.*]] = tail call range(i8 0, 9) <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[AND]], i1 false) +; CHECK-NEXT: [[TMP1:%.*]] = call range(i8 0, 9) <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[A0]], i1 false) +; CHECK-NEXT: [[CLZ:%.*]] = sub nuw nsw <2 x i8> splat (i8 8), [[TMP1]] ; CHECK-NEXT: ret <2 x i8> [[CLZ]] ; %dec = add <2 x i8> %a0,