[ValueTracking] Enhance alignment propagation in computeKnownBits. #166935

egorshamshura · 2025-11-07T13:15:52Z

alive2: https://alive2.llvm.org/ce/z/h8HYTo
godbolt: https://godbolt.org/z/Mqzs9W8q4

llvmbot · 2025-11-07T13:16:26Z

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-coroutines
@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Shamshura Egor (egorshamshura)

Changes

Fixies: #152797

alive2: https://alive2.llvm.org/ce/z/h8HYTo
godbolt: https://godbolt.org/z/Mqzs9W8q4

Full diff: https://github.com/llvm/llvm-project/pull/166935.diff

3 Files Affected:

(modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+13)
(added) llvm/test/Transforms/InstCombine/redundant-add-in-and.ll (+60)
(modified) llvm/test/Transforms/LoopVectorize/induction.ll (+21-9)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cbaff294819a2..cf009c4647d94 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2470,6 +2470,19 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
   }
 
+  // (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0
+  if (match(Op0, m_Add(m_Value(X), m_Value(Y)))) {
+    const APInt *PowerC;
+    if (match(Op1, m_Power2(PowerC)) && !PowerC->isOne()) {
+      KnownBits YKnown = computeKnownBits(Y, &I);
+
+      APInt YMod = YKnown.Zero;
+      if (YMod.countTrailingZeros() > PowerC->logBase2() + 1) {
+        return BinaryOperator::CreateAnd(X, Op1);
+      }
+    }
+  }
+
   // Canonicalize:
   // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
   if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(
diff --git a/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll b/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll
new file mode 100644
index 0000000000000..85be06d769fcc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @addition_and_bitwise1(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise1(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[V0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[V1]], 2
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 4
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v2, %v3
+  %v5 = and i64 %v4, 2
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
+define i1 @addition_and_bitwise2(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise2(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[V0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[V1]], 4
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 16
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v2, %v3
+  %v5 = and i64 %v4, 4
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
+define i1 @addition_and_bitwise3(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise3(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V3:%.*]] = ptrtoint ptr [[V0]] to i64
+; CHECK-NEXT:    [[V5:%.*]] = and i64 [[V3]], 4
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i64 [[V5]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 16
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v3, %v2
+  %v5 = and i64 %v4, 4
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 66e4de5da7955..04f0cf78b4081 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -4274,10 +4274,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
 ; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; IND:       vector.scevcheck:
-; IND-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; IND-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; IND-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; IND-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; IND-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; IND-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; IND-NEXT:    [[TMP4:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; IND-NEXT:    br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; IND:       vector.ph:
-; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967294
+; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450942
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4314,10 +4318,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
 ; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; UNROLL:       vector.scevcheck:
-; UNROLL-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; UNROLL-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; UNROLL-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; UNROLL-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; UNROLL-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; UNROLL-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; UNROLL-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; UNROLL-NEXT:    br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; UNROLL:       vector.ph:
-; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967292
+; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450940
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4402,10 +4410,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
 ; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; INTERLEAVE:       vector.scevcheck:
-; INTERLEAVE-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; INTERLEAVE-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; INTERLEAVE-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; INTERLEAVE-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; INTERLEAVE-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; INTERLEAVE-NEXT:    br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
-; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967288
+; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450936
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

dtcxzyw · 2025-11-08T10:17:22Z

llvm/test/Transforms/InstCombine/redundant-add-in-and.ll

+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @addition_and_bitwise1(ptr %0) {


Don't include unrelated instructions.
You can use y = arg & (2^(C+1) - 1) to make sure the low bits of y are zeros.
Here is an example:

define i32 @test(i32 %x, i32 %y) { %y_masked = and i32 %y, -4 %add = add i32 %x, %y_masked %and = and i32 %add, 2 ret i32 %and }

Current version of the InstCombine simplifies it well:

define i32 @test(i32 %x, i32 %y) { %and = and i32 %x, 2 ret i32 %and }

However, InstCombine does not simplify the version with ptrtoint and load with align.

define i1 @addition_and_bitwise3(ptr %0) { ; CHECK-LABEL: define i1 @addition_and_bitwise3( ; CHECK-SAME: ptr [[TMP0:%.*]]) { ; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 ; CHECK-NEXT: [[V3:%.*]] = ptrtoint ptr [[V0]] to i64 ; CHECK-NEXT: [[V5:%.*]] = and i64 [[V3]], 4 ; CHECK-NEXT: [[V6:%.*]] = icmp eq i64 [[V5]], 0 ; CHECK-NEXT: ret i1 [[V6]] ; %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 %v1 = load i32, ptr %v0, align 16 %v2 = zext i32 %v1 to i64 %v3 = ptrtoint ptr %v0 to i64 %v4 = add i64 %v3, %v2 %v5 = and i64 %v4, 4 %v6 = icmp eq i64 %v5, 0 ret i1 %v6 }

I could not find code that uses alignment information from load instruction in InstCombinerImpl::SimplifyDemandedUseBits.

Am I right? Should I add case Instruction::Load that computes some bits based on alignment in load.

So PtrToInt tries to computeKnownBits

llvm-project/llvm/lib/Analysis/ValueTracking.cpp

Line 1463 in 8d950d2

computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);

and can not get information for %v3 about alignment. This is a traversing path of computeKnownBits for %v3 = ptrtoint ptr %v0 to i64:

Compute bits for : %v3 = ptrtoint ptr %v0 to i64 %v3 = ptrtoint ptr %v0 to i64 Compute bits for : %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 Compute bits for : ptr %0 left ptr %0 left %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 left %v3 = ptrtoint ptr %v0 to i64

So I think we should check users of a pointer to add KnownBits about it.

Make sense to me. We can check users and refine the result here:

llvm-project/llvm/lib/Analysis/ValueTracking.cpp

Lines 2506 to 2510 in 4d1f249

// Aligned pointers have trailing zeros - refine Known.Zero set

if (isa<PointerType>(V->getType())) {

Align Alignment = V->getPointerAlignment(Q.DL);

Known.Zero.setLowBits(Log2(Alignment));

}

You may need the helper getLoadStoreAlignment. Be careful of the operand idx in store users, as the pointers can also be the stored value, not pointer.

Thanks! I also found this place. However I have got a problem with:
LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error.
in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Thanks! I also found this place. However I have got a problem with: LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error. in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

And also I dont have an opportunity to update tests for other targets. Can you help me please?

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

I dont want to attach huge output here, so I post it in the github gist: https://gist.github.com/egorshamshura/f8a3d117bb01fa1fea8e0f7ee4f0d4af

As I can see there are patterns like and(%1, 1) where %1 is aligned pointer.

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

Thanks! Updated them

So the problem is here:

define dso_local void @_Z3onev() local_unnamed_addr !dbg !29 { entry: %0 = load ptr, ptr @glob, align 8, !dbg !35 call void @llvm.dbg.value(metadata ptr %0, metadata !22, metadata !DIExpression()), !dbg !40 %1 = ptrtoint ptr %0 to i64, !dbg !42 %and.i = and i64 %1, 1, !dbg !43 %tobool.not.i = icmp eq i64 %and.i, 0, !dbg !42 %2 = bitcast ptr %0 to ptr, !dbg !44 %retval.0.i = select i1 %tobool.not.i, ptr %2, ptr null, !dbg !44 call void @llvm.dbg.value(metadata ptr %retval.0.i, metadata !33, metadata !DIExpression()), !dbg !45 %tobool.not = icmp eq ptr %retval.0.i, null, !dbg !46 br i1 %tobool.not, label %if.end, label %if.then, !dbg !47 if.then: ; preds = %entry call void @llvm.dbg.value(metadata ptr %retval.0.i, metadata !48, metadata !DIExpression()), !dbg !51 %ptr.i = getelementptr inbounds %struct.Thing, ptr %retval.0.i, i64 0, i32 0, !dbg !53 %3 = load ptr, ptr %ptr.i, align 8, !dbg !53 store ptr %3, ptr @glob, align 8, !dbg !56 br label %if.end, !dbg !57 if.end: ; preds = %if.then, %entry ret void, !dbg !58 }

As you can see the user of %0 = load ptr, ptr @glob, align 8, !dbg !35 is %2 = bitcast ptr %0 to ptr, !dbg !44 then %retval.0.i = select i1 %tobool.not.i, ptr %2, ptr null, !dbg !44 and after it

%ptr.i = getelementptr inbounds %struct.Thing, ptr %retval.0.i, i64 0, i32 0, !dbg !53 %3 = load ptr, ptr %ptr.i, align 8, !dbg !53

In this case, we cannot fold in the first iteration because it is not a direct user of %0. We need to obtain information about the alignment recursively from %0 to its users (like %0 <- %2 <- %retval.0.i <- %ptr.i).

How to implement this correctly? Do I need to write a separate recursive traversal function?

You can recheck this after addressing my comment #166935 (comment). In this case we cannot infer bits of %0 since %3 = load ptr, ptr %ptr.i, align 8 doesn't post-dominate %0 = load ptr, ptr @glob, align 8.

Thanks! Added isValidAssumeForContext

dtcxzyw · 2025-11-08T10:24:50Z

llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp

    return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
  }

+  // (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0


It reminds me that we already support this pattern in

llvm-project/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp

Lines 608 to 613 in 8d950d2

// If we are known to be adding zeros to every bit below

// the highest demanded bit, we just return the other side.

if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))

return I->getOperand(0);

if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))

return I->getOperand(1);

Can you dig out the reason why SimplifyDemandedBits fails to eliminate the add? Does it reach the max depth limit? Or some context information gets lost so we cannot use load ... align 4 to infer the alignment.

github-actions · 2025-11-12T17:27:32Z

✅ With the latest revision this PR passed the C/C++ code formatter.

dtcxzyw · 2025-11-12T17:31:27Z

llvm/lib/Analysis/ValueTracking.cpp

    Known.Zero.setLowBits(Log2(Alignment));
+    for (auto* User : V->users()) {
+      if (auto* Load = dyn_cast<LoadInst>(User)) {
+        Known.Zero.setLowBits(Log2(Load->getAlign()));


Check isValidAssumeForContext after ensuring the known bits can be improved. This function is a bit expensive.

How to use this function properly? I have tried to use it in different ways, for example like this:

if (isa<PointerType>(V->getType())) { Align Alignment = V->getPointerAlignment(Q.DL); Known.Zero.setLowBits(Log2(Alignment)); for (auto *User : V->users()) { if (auto *Load = dyn_cast<LoadInst>(User)) { if (Q.CxtI && !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) { continue; } Known.Zero.setBits(0, Log2(Load->getAlign())); } else if (auto *Store = dyn_cast<StoreInst>(User)) { if (Store->getPointerOperand() != V || (Q.CxtI && !isValidAssumeForContext(Store, Q.CxtI, Q.DT))) { continue; } Known.Zero.setBits(0, Log2(Store->getAlign())); } } }

But

opt: /root/repos/llvm/llvm-project/llvm/include/llvm/Support/GenericDomTree.h:403: DomTreeNodeBase<NodeT> *llvm::DominatorTreeBase<llvm::BasicBlock, false>::getNode(const NodeT *) const [NodeT = llvm::BasicBlock, IsPostDom = false]: Assertion `(!BB || Parent == NodeTrait::getParent(const_cast<NodeT *>(BB))) && "cannot get DomTreeNode of block with different parent"' failed.

I also have tried to get user's basic block and check if UserBB == VBB and it worked well, however I think we need to use isValidAssumeForContext .

I guess this means that Load and Q.CxtI are in different Functions. Maybe you need an explicit check for that before calling isValidAssumeForContext.

I guess this means that Load and Q.CxtI are in different Functions. Maybe you need an explicit check for that before calling isValidAssumeForContext.

V may be a global variable and used by multiple functions.

dtcxzyw · 2025-11-12T17:40:32Z

llvm/test/Transforms/InstCombine/redundant-add-in-and.ll

+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @addition_and_bitwise1(ptr %0) {


Thanks! I also found this place. However I have got a problem with: LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error. in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

And also I dont have an opportunity to update tests for other targets. Can you help me please?

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

arsenm · 2025-11-12T19:30:18Z

llvm/lib/Analysis/ValueTracking.cpp

+        Known.Zero.setLowBits(Log2(Load->getAlign()));
+      }
+      if (auto *Store = dyn_cast<StoreInst>(User)) {
+        if (Store->getOperand(1) != V) {


Suggested change

if (Store->getOperand(1) != V) {

if (Store->getPointerOperand() != V) {

Thanks, fixed

dtcxzyw · 2025-11-13T02:47:55Z

The PR title and description need to be updated.

jayfoad · 2025-11-13T15:26:46Z

llvm/lib/Analysis/ValueTracking.cpp

+        if (Q.CxtI && SameFunction(Load, Q.CxtI) &&
+            !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {


Should this be:

Suggested change

if (Q.CxtI && SameFunction(Load, Q.CxtI) &&

!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

if (Q.CxtI && (!SameFunction(Load, Q.CxtI) ||

!isValidAssumeForContext(Load, Q.CxtI, Q.DT))) {

?

maybe (!Q.CxtI || !SameFunction(Load, Q.CxtI) || !isValidAssumeForContext(Load, Q.CxtI, Q.DT))?

dtcxzyw · 2025-11-13T16:23:40Z

llvm/include/llvm/Analysis/ValueTracking.h

 template <typename T> class ArrayRef;

-constexpr unsigned MaxAnalysisRecursionDepth = 6;
+constexpr unsigned MaxAnalysisRecursionDepth = 20;


oh, sorry, accidentally added

I also was trying to figure out why test was failing... :( Thanks

jayfoad · 2025-11-13T16:30:17Z

llvm/lib/Analysis/ValueTracking.cpp

  if (isa<PointerType>(V->getType())) {
    Align Alignment = V->getPointerAlignment(Q.DL);
    Known.Zero.setLowBits(Log2(Alignment));
+    for (auto *User : V->users()) {
+      if (auto *Load = dyn_cast<LoadInst>(User)) {
+        if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||


I don't understand this version -- if this is correct then you could wrap the entire loop in if (Q.CxtI).

What is the desired behaviour when Q.CxtI is nullptr?

We can do nothing without CxtI.

dtcxzyw

Please don't update tests too early. If the compile-time impact is high, we have to switch to a more conservative way (e.g., only checking one user).

dtcxzyw · 2025-11-13T16:26:14Z

llvm/lib/Analysis/ValueTracking.cpp

  if (isa<PointerType>(V->getType())) {
    Align Alignment = V->getPointerAlignment(Q.DL);
    Known.Zero.setLowBits(Log2(Alignment));
+    for (auto *User : V->users()) {


Suggested change

for (auto *User : V->users()) {

if (Q.CxtI) {

Function *CxtFunc = Q.CxtI->getFunction();

for (auto *User : V->users()) {

When CxtI is unavailable, we don't need to iterate the user list.

Thanks, fixed

dtcxzyw · 2025-11-13T16:26:41Z

llvm/lib/Analysis/ValueTracking.cpp

    Known.Zero.setLowBits(Log2(Alignment));
+    for (auto *User : V->users()) {
+      if (auto *Load = dyn_cast<LoadInst>(User)) {
+        if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||


Suggested change

if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||

if (Load->getFunction() != CxtFunc ||

dtcxzyw · 2025-11-13T16:31:01Z

llvm/lib/Analysis/ValueTracking.cpp

+    for (auto *User : V->users()) {
+      if (auto *Load = dyn_cast<LoadInst>(User)) {
+        if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||
+            !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {


As I mentioned earlier, only check this after ensuring that the result can be improved.

Suggested change

!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

Known.countMinTrailingZeros() >= Log2(Load->getAlign()) || !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

We need a lambda helper function to avoid duplicating this for stores.

Fixed, decided to use getLoadStorePointerOperand and getLoadStoreAlignment.

nikic · 2025-11-13T22:16:29Z

I don't think we should be doing this in ValueTracking. Can we instead extend InferAlignment to also "infer" alignment for the and of ptrtoint pattern?

arsenm · 2025-11-13T22:22:08Z

llvm/test/CodeGen/BPF/cc_args.ll

Shouldn't switch tests to generated checks in a functional pr

sorry, fixed

egorshamshura · 2025-11-14T04:52:03Z

Currently added version that handles infromation about alignment in ValueTracking. Let me know if you decide to handle it in InferAlignment.

jayfoad · 2025-11-14T11:39:33Z

llvm/lib/Analysis/ValueTracking.cpp

+        return Known.countMinTrailingZeros() >=
+                   Log2(getLoadStoreAlignment(Instruction)) ||
+               Instruction->getFunction() != CxtFunc ||


I don't see why you need the first condition.

Suggested change

return Known.countMinTrailingZeros() >=

Log2(getLoadStoreAlignment(Instruction)) ||

Instruction->getFunction() != CxtFunc ||

return Instruction->getFunction() != CxtFunc ||

See my previous comment #166935 (comment).

Ah OK, but putting that check in a function called InvalidUser is pretty misleading! In fact I don't think having InvalidUser is helping at all and it should just be inlined.

Yeah. It deserves a better name. I am not sure if we will leverage the align attributes on the pointer parameters of memory intrinsics in the future. I'd adjust the lambda to something like auto RefineKnownBitsWithAlign = [](Instruction *I, Align alignment).

Ah, I see the branches for load/store are merged into one if statement with getLoadStorePointerOperand. It is ok to inline the logic for now.

dtcxzyw · 2025-11-14T17:45:53Z

This patch has a significant compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=3ab8d44570e1dd7d7010b3bce624dc3f26141cb7&stat=instructions:u We should limit the number of uses to inspect (e.g., <= DomConditionsMaxUses).

If it is still expensive, we may move the logic into InferAlignment and only handle some specific patterns.

dtcxzyw · 2025-11-15T10:55:56Z

Limit = DomConditionsMaxUses: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=792adc4c18f74fd8e952c56da88bfdff55e126f2&stat=instructions%3Au
Bail out on constants: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=ab57a886acd4050a33c89493293769f38222c38b&stat=instructions%3Au
Limit = 4: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=26d0ccd109380dc27ec6963585db6ce1c6cc7574&stat=instructions%3Au
Limit = 1: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=5c34526185e4e14dc6be395457447916ae907af4&stat=instructions%3Au

egorshamshura · 2025-11-15T11:30:24Z

Limit = DomConditionsMaxUses: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=792adc4c18f74fd8e952c56da88bfdff55e126f2&stat=instructions%3Au Bail out on constants: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=ab57a886acd4050a33c89493293769f38222c38b&stat=instructions%3Au Limit = 4: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=26d0ccd109380dc27ec6963585db6ce1c6cc7574&stat=instructions%3Au Limit = 1: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=5c34526185e4e14dc6be395457447916ae907af4&stat=instructions%3Au

So what do we choose? Do I need to do this in InferAlignment? Maybe we can check users in the same basic block?

nikic · 2025-11-17T02:39:13Z

I think that making the limit low enough to avoid the compile time impact will make this optimization quite unreliable. While any given load/store user will likely have the same alignment, it's likely that the one inspected user will either be a GEP or not dominate. And of course, the picked user depends on the use-list order.

Doing this in InferAlignment should be more reliable, with the downside that it runs late in the pipeline and other computeKnownBits users can't benefit.

egorshamshura requested a review from nikic as a code owner November 7, 2025 13:15

llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 7, 2025

egorshamshura mentioned this pull request Nov 7, 2025

[InstCombine] Fold (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0 #157072

Closed

dtcxzyw reviewed Nov 8, 2025

View reviewed changes

Added tests.

b784fa9

egorshamshura force-pushed the fixies-152797 branch from 341d11b to c7b7b2c Compare November 12, 2025 17:25

llvmbot added coroutines C++20 coroutines llvm:analysis Includes value tracking, cost tables and constant folding labels Nov 12, 2025

egorshamshura force-pushed the fixies-152797 branch from c7b7b2c to d5f0220 Compare November 12, 2025 17:32

dtcxzyw reviewed Nov 12, 2025

View reviewed changes

egorshamshura force-pushed the fixies-152797 branch from d5f0220 to c380452 Compare November 12, 2025 18:07

llvmbot added backend:AMDGPU backend:PowerPC llvm:globalisel labels Nov 12, 2025

arsenm reviewed Nov 12, 2025

View reviewed changes

egorshamshura changed the title ~~[InstCombine] Fold (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0~~ [Analysis] Enhance alignment propagation in computeKnownBits. Nov 13, 2025

egorshamshura force-pushed the fixies-152797 branch from c380452 to 57b936a Compare November 13, 2025 15:17

llvmbot added backend:RISC-V backend:SystemZ labels Nov 13, 2025

egorshamshura force-pushed the fixies-152797 branch from 57b936a to 5a49c4c Compare November 13, 2025 15:20

jayfoad reviewed Nov 13, 2025

View reviewed changes

egorshamshura force-pushed the fixies-152797 branch from 5a49c4c to a52dcfc Compare November 13, 2025 16:18

llvmbot added the clang Clang issues not falling into any other category label Nov 13, 2025

dtcxzyw reviewed Nov 13, 2025

View reviewed changes

jayfoad reviewed Nov 13, 2025

View reviewed changes

dtcxzyw reviewed Nov 13, 2025

View reviewed changes

dtcxzyw changed the title ~~[Analysis] Enhance alignment propagation in computeKnownBits.~~ [ValueTracking] Enhance alignment propagation in computeKnownBits. Nov 13, 2025

arsenm reviewed Nov 13, 2025

View reviewed changes

egorshamshura force-pushed the fixies-152797 branch from a52dcfc to 4a5b3dc Compare November 14, 2025 04:45

jayfoad reviewed Nov 14, 2025

View reviewed changes

dtcxzyw mentioned this pull request Nov 14, 2025

Task submission dtcxzyw/llvm-opt-benchmark#1312

Open

zyw-bot mentioned this pull request Nov 14, 2025

pre-commit: PR166935 dtcxzyw/llvm-opt-benchmark#3068

Open

Added opt. Updated tests.

b726370

egorshamshura force-pushed the fixies-152797 branch from 4a5b3dc to b726370 Compare November 14, 2025 18:51

	// Aligned pointers have trailing zeros - refine Known.Zero set
	if (isa<PointerType>(V->getType())) {
	Align Alignment = V->getPointerAlignment(Q.DL);
	Known.Zero.setLowBits(Log2(Alignment));
	}

	// If we are known to be adding zeros to every bit below
	// the highest demanded bit, we just return the other side.
	if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
	return I->getOperand(0);
	if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
	return I->getOperand(1);

	if (Store->getOperand(1) != V) {
	if (Store->getPointerOperand() != V) {

		if (Q.CxtI && SameFunction(Load, Q.CxtI) &&
		!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

	if (!Q.CxtI \|\| !SameFunction(Load, Q.CxtI) \|\|
	if (Load->getFunction() != CxtFunc \|\|

	!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
	Known.countMinTrailingZeros() >= Log2(Load->getAlign()) \|\| !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

[ValueTracking] Enhance alignment propagation in computeKnownBits. #166935

Are you sure you want to change the base?

[ValueTracking] Enhance alignment propagation in computeKnownBits. #166935

Uh oh!

Conversation

egorshamshura commented Nov 7, 2025

Uh oh!

llvmbot commented Nov 7, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

egorshamshura Nov 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

egorshamshura Nov 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

egorshamshura Nov 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

github-actions bot commented Nov 12, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

egorshamshura Nov 13, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

dtcxzyw commented Nov 13, 2025

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

dtcxzyw left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

llvmbot commented Nov 7, 2025 •

edited

Loading

egorshamshura Nov 12, 2025 •

edited

Loading

egorshamshura Nov 12, 2025 •

edited

Loading

egorshamshura Nov 12, 2025 •

edited

Loading

github-actions bot commented Nov 12, 2025 •

edited

Loading

egorshamshura Nov 13, 2025 •

edited

Loading