Skip to content

Conversation

@egorshamshura
Copy link
Contributor

@egorshamshura egorshamshura requested a review from nikic as a code owner November 7, 2025 13:15
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 7, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 7, 2025

@llvm/pr-subscribers-clang
@llvm/pr-subscribers-backend-risc-v
@llvm/pr-subscribers-backend-systemz
@llvm/pr-subscribers-llvm-globalisel
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-coroutines
@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Shamshura Egor (egorshamshura)

Changes

Fixies: #152797

alive2: https://alive2.llvm.org/ce/z/h8HYTo
godbolt: https://godbolt.org/z/Mqzs9W8q4


Full diff: https://github.com/llvm/llvm-project/pull/166935.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+13)
  • (added) llvm/test/Transforms/InstCombine/redundant-add-in-and.ll (+60)
  • (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+21-9)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index cbaff294819a2..cf009c4647d94 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2470,6 +2470,19 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
     return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
   }
 
+  // (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0
+  if (match(Op0, m_Add(m_Value(X), m_Value(Y)))) {
+    const APInt *PowerC;
+    if (match(Op1, m_Power2(PowerC)) && !PowerC->isOne()) {
+      KnownBits YKnown = computeKnownBits(Y, &I);
+
+      APInt YMod = YKnown.Zero;
+      if (YMod.countTrailingZeros() > PowerC->logBase2() + 1) {
+        return BinaryOperator::CreateAnd(X, Op1);
+      }
+    }
+  }
+
   // Canonicalize:
   // (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
   if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(
diff --git a/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll b/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll
new file mode 100644
index 0000000000000..85be06d769fcc
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/redundant-add-in-and.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @addition_and_bitwise1(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise1(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[V0]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[V1]], 2
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 4
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v2, %v3
+  %v5 = and i64 %v4, 2
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
+define i1 @addition_and_bitwise2(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise2(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V1:%.*]] = load i32, ptr [[V0]], align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[V1]], 4
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 16
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v2, %v3
+  %v5 = and i64 %v4, 4
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
+define i1 @addition_and_bitwise3(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise3(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT:    [[V3:%.*]] = ptrtoint ptr [[V0]] to i64
+; CHECK-NEXT:    [[V5:%.*]] = and i64 [[V3]], 4
+; CHECK-NEXT:    [[V6:%.*]] = icmp eq i64 [[V5]], 0
+; CHECK-NEXT:    ret i1 [[V6]]
+;
+  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+  %v1 = load i32, ptr %v0, align 16
+  %v2 = zext i32 %v1 to i64
+  %v3 = ptrtoint ptr %v0 to i64
+  %v4 = add i64 %v3, %v2
+  %v5 = and i64 %v4, 4
+  %v6 = icmp eq i64 %v5, 0
+  ret i1 %v6
+}
+
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index 66e4de5da7955..04f0cf78b4081 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -4274,10 +4274,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
 ; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; IND:       vector.scevcheck:
-; IND-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; IND-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; IND-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; IND-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; IND-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; IND-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; IND-NEXT:    [[TMP4:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; IND-NEXT:    br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; IND:       vector.ph:
-; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967294
+; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450942
 ; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; IND:       vector.body:
 ; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4314,10 +4318,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
 ; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; UNROLL:       vector.scevcheck:
-; UNROLL-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; UNROLL-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; UNROLL-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; UNROLL-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; UNROLL-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; UNROLL-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; UNROLL-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; UNROLL-NEXT:    br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; UNROLL:       vector.ph:
-; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967292
+; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450940
 ; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; UNROLL:       vector.body:
 ; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4402,10 +4410,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
 ; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
 ; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
 ; INTERLEAVE:       vector.scevcheck:
-; INTERLEAVE-NEXT:    [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; INTERLEAVE-NEXT:    br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; INTERLEAVE-NEXT:    [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; INTERLEAVE-NEXT:    [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; INTERLEAVE-NEXT:    [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; INTERLEAVE-NEXT:    br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; INTERLEAVE:       vector.ph:
-; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 4294967288
+; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], 6442450936
 ; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; INTERLEAVE:       vector.body:
 ; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

define i1 @addition_and_bitwise1(ptr %0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't include unrelated instructions.
You can use y = arg & (2^(C+1) - 1) to make sure the low bits of y are zeros.
Here is an example:

define i32 @test(i32 %x, i32 %y) {
  %y_masked = and i32 %y, -4
  %add = add i32 %x, %y_masked
  %and = and i32 %add, 2
  ret i32 %and
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Current version of the InstCombine simplifies it well:

define i32 @test(i32 %x, i32 %y) {
  %and = and i32 %x, 2
  ret i32 %and
}

However, InstCombine does not simplify the version with ptrtoint and load with align.

define i1 @addition_and_bitwise3(ptr %0) {
; CHECK-LABEL: define i1 @addition_and_bitwise3(
; CHECK-SAME: ptr [[TMP0:%.*]]) {
; CHECK-NEXT:    [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
; CHECK-NEXT:    [[V3:%.*]] = ptrtoint ptr [[V0]] to i64
; CHECK-NEXT:    [[V5:%.*]] = and i64 [[V3]], 4
; CHECK-NEXT:    [[V6:%.*]] = icmp eq i64 [[V5]], 0
; CHECK-NEXT:    ret i1 [[V6]]
;
  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
  %v1 = load i32, ptr %v0, align 16
  %v2 = zext i32 %v1 to i64
  %v3 = ptrtoint ptr %v0 to i64
  %v4 = add i64 %v3, %v2
  %v5 = and i64 %v4, 4
  %v6 = icmp eq i64 %v5, 0
  ret i1 %v6
}

I could not find code that uses alignment information from load instruction in InstCombinerImpl::SimplifyDemandedUseBits.

Am I right? Should I add case Instruction::Load that computes some bits based on alignment in load.

Copy link
Contributor Author

@egorshamshura egorshamshura Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So PtrToInt tries to computeKnownBits

computeKnownBits(I->getOperand(0), DemandedElts, Known, Q, Depth + 1);
and can not get information for %v3 about alignment. This is a traversing path of computeKnownBits for %v3 = ptrtoint ptr %v0 to i64:

Compute bits for :   %v3 = ptrtoint ptr %v0 to i64
  %v3 = ptrtoint ptr %v0 to i64
Compute bits for :   %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
  %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
Compute bits for : ptr %0
left ptr %0
left   %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
left   %v3 = ptrtoint ptr %v0 to i64

So I think we should check users of a pointer to add KnownBits about it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Make sense to me. We can check users and refine the result here:

// Aligned pointers have trailing zeros - refine Known.Zero set
if (isa<PointerType>(V->getType())) {
Align Alignment = V->getPointerAlignment(Q.DL);
Known.Zero.setLowBits(Log2(Alignment));
}

You may need the helper getLoadStoreAlignment. Be careful of the operand idx in store users, as the pointers can also be the stored value, not pointer.

Copy link
Contributor Author

@egorshamshura egorshamshura Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I also found this place. However I have got a problem with:
LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error.
in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I also found this place. However I have got a problem with: LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error. in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

And also I dont have an opportunity to update tests for other targets. Can you help me please?

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

Copy link
Contributor Author

@egorshamshura egorshamshura Nov 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

I dont want to attach huge output here, so I post it in the github gist: https://gist.github.com/egorshamshura/f8a3d117bb01fa1fea8e0f7ee4f0d4af

As I can see there are patterns like and(%1, 1) where %1 is aligned pointer.

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

Thanks! Updated them

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the problem is here:

define dso_local void @_Z3onev() local_unnamed_addr !dbg !29 {
entry:
  %0 = load ptr, ptr @glob, align 8, !dbg !35
  call void @llvm.dbg.value(metadata ptr %0, metadata !22, metadata !DIExpression()), !dbg !40
  %1 = ptrtoint ptr %0 to i64, !dbg !42
  %and.i = and i64 %1, 1, !dbg !43
  %tobool.not.i = icmp eq i64 %and.i, 0, !dbg !42
  %2 = bitcast ptr %0 to ptr, !dbg !44
  %retval.0.i = select i1 %tobool.not.i, ptr %2, ptr null, !dbg !44
  call void @llvm.dbg.value(metadata ptr %retval.0.i, metadata !33, metadata !DIExpression()), !dbg !45
  %tobool.not = icmp eq ptr %retval.0.i, null, !dbg !46
  br i1 %tobool.not, label %if.end, label %if.then, !dbg !47

if.then:                                          ; preds = %entry
  call void @llvm.dbg.value(metadata ptr %retval.0.i, metadata !48, metadata !DIExpression()), !dbg !51
  %ptr.i = getelementptr inbounds %struct.Thing, ptr %retval.0.i, i64 0, i32 0, !dbg !53
  %3 = load ptr, ptr %ptr.i, align 8, !dbg !53
  store ptr %3, ptr @glob, align 8, !dbg !56
  br label %if.end, !dbg !57

if.end:                                           ; preds = %if.then, %entry
  ret void, !dbg !58
}

As you can see the user of %0 = load ptr, ptr @glob, align 8, !dbg !35 is %2 = bitcast ptr %0 to ptr, !dbg !44 then %retval.0.i = select i1 %tobool.not.i, ptr %2, ptr null, !dbg !44 and after it

%ptr.i = getelementptr inbounds %struct.Thing, ptr %retval.0.i, i64 0, i32 0, !dbg !53
%3 = load ptr, ptr %ptr.i, align 8, !dbg !53

In this case, we cannot fold in the first iteration because it is not a direct user of %0. We need to obtain information about the alignment recursively from %0 to its users (like %0 <- %2 <- %retval.0.i <- %ptr.i).

How to implement this correctly? Do I need to write a separate recursive traversal function?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can recheck this after addressing my comment #166935 (comment). In this case we cannot infer bits of %0 since %3 = load ptr, ptr %ptr.i, align 8 doesn't post-dominate %0 = load ptr, ptr @glob, align 8.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Added isValidAssumeForContext

return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
}

// (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It reminds me that we already support this pattern in

// If we are known to be adding zeros to every bit below
// the highest demanded bit, we just return the other side.
if (DemandedFromOps.isSubsetOf(RHSKnown.Zero))
return I->getOperand(0);
if (DemandedFromOps.isSubsetOf(LHSKnown.Zero))
return I->getOperand(1);

Can you dig out the reason why SimplifyDemandedBits fails to eliminate the add? Does it reach the max depth limit? Or some context information gets lost so we cannot use load ... align 4 to infer the alignment.

@llvmbot llvmbot added coroutines C++20 coroutines llvm:analysis Includes value tracking, cost tables and constant folding labels Nov 12, 2025
@github-actions
Copy link

github-actions bot commented Nov 12, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Known.Zero.setLowBits(Log2(Alignment));
for (auto* User : V->users()) {
if (auto* Load = dyn_cast<LoadInst>(User)) {
Known.Zero.setLowBits(Log2(Load->getAlign()));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check isValidAssumeForContext after ensuring the known bits can be improved. This function is a bit expensive.

Copy link
Contributor Author

@egorshamshura egorshamshura Nov 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How to use this function properly? I have tried to use it in different ways, for example like this:

if (isa<PointerType>(V->getType())) {
    Align Alignment = V->getPointerAlignment(Q.DL);
    Known.Zero.setLowBits(Log2(Alignment));
    for (auto *User : V->users()) {
      if (auto *Load = dyn_cast<LoadInst>(User)) {
        if (Q.CxtI && !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
          continue;
        }
        Known.Zero.setBits(0, Log2(Load->getAlign()));
      } else if (auto *Store = dyn_cast<StoreInst>(User)) {
        if (Store->getPointerOperand() != V || (Q.CxtI && !isValidAssumeForContext(Store, Q.CxtI, Q.DT))) {
          continue;
        }
        Known.Zero.setBits(0, Log2(Store->getAlign()));
      }
    }
  }

But

opt: /root/repos/llvm/llvm-project/llvm/include/llvm/Support/GenericDomTree.h:403: DomTreeNodeBase<NodeT> *llvm::DominatorTreeBase<llvm::BasicBlock, false>::getNode(const NodeT *) const [NodeT = llvm::BasicBlock, IsPostDom = false]: Assertion `(!BB || Parent == NodeTrait::getParent(const_cast<NodeT *>(BB))) && "cannot get DomTreeNode of block with different parent"' failed.

I also have tried to get user's basic block and check if UserBB == VBB and it worked well, however I think we need to use isValidAssumeForContext .

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this means that Load and Q.CxtI are in different Functions. Maybe you need an explicit check for that before calling isValidAssumeForContext.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this means that Load and Q.CxtI are in different Functions. Maybe you need an explicit check for that before calling isValidAssumeForContext.

V may be a global variable and used by multiple functions.

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt < %s -passes=instcombine -S | FileCheck %s

define i1 @addition_and_bitwise1(ptr %0) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I also found this place. However I have got a problem with: LLVM ERROR: Instruction Combining on _Z3onev did not reach a fixpoint after 1 iterations. Use 'instcombine<no-verify-fixpoint>' or function attribute 'instcombine-no-verify-fixpoint' to suppress this error. in DebugInfo/Generic/instcombine-replaced-select-with-operand.ll test. Can you have a look?

Use -passes=instcombine<no-verify-fixpoint;max-iterations=10> -debug-only=instcombine and see if there are some patterns that get folded after the first iteration ends.

And also I dont have an opportunity to update tests for other targets. Can you help me please?

Are they llc tests? If so, use update_llc_test_checks.py to regenerate check lines.

Known.Zero.setLowBits(Log2(Load->getAlign()));
}
if (auto *Store = dyn_cast<StoreInst>(User)) {
if (Store->getOperand(1) != V) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (Store->getOperand(1) != V) {
if (Store->getPointerOperand() != V) {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, fixed

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 13, 2025

The PR title and description need to be updated.

@egorshamshura egorshamshura changed the title [InstCombine] Fold (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0 [Analysis] Enhance alignment propagation in computeKnownBits. Nov 13, 2025
Comment on lines 2379 to 2380
if (Q.CxtI && SameFunction(Load, Q.CxtI) &&
!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be:

Suggested change
if (Q.CxtI && SameFunction(Load, Q.CxtI) &&
!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
if (Q.CxtI && (!SameFunction(Load, Q.CxtI) ||
!isValidAssumeForContext(Load, Q.CxtI, Q.DT))) {

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe (!Q.CxtI || !SameFunction(Load, Q.CxtI) || !isValidAssumeForContext(Load, Q.CxtI, Q.DT))?

@llvmbot llvmbot added the clang Clang issues not falling into any other category label Nov 13, 2025
template <typename T> class ArrayRef;

constexpr unsigned MaxAnalysisRecursionDepth = 6;
constexpr unsigned MaxAnalysisRecursionDepth = 20;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

oh, sorry, accidentally added

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also was trying to figure out why test was failing... :( Thanks

if (isa<PointerType>(V->getType())) {
Align Alignment = V->getPointerAlignment(Q.DL);
Known.Zero.setLowBits(Log2(Alignment));
for (auto *User : V->users()) {
if (auto *Load = dyn_cast<LoadInst>(User)) {
if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't understand this version -- if this is correct then you could wrap the entire loop in if (Q.CxtI).

What is the desired behaviour when Q.CxtI is nullptr?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do nothing without CxtI.

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't update tests too early. If the compile-time impact is high, we have to switch to a more conservative way (e.g., only checking one user).

if (isa<PointerType>(V->getType())) {
Align Alignment = V->getPointerAlignment(Q.DL);
Known.Zero.setLowBits(Log2(Alignment));
for (auto *User : V->users()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (auto *User : V->users()) {
if (Q.CxtI) {
Function *CxtFunc = Q.CxtI->getFunction();
for (auto *User : V->users()) {

When CxtI is unavailable, we don't need to iterate the user list.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, fixed

Known.Zero.setLowBits(Log2(Alignment));
for (auto *User : V->users()) {
if (auto *Load = dyn_cast<LoadInst>(User)) {
if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||
if (Load->getFunction() != CxtFunc ||

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed

for (auto *User : V->users()) {
if (auto *Load = dyn_cast<LoadInst>(User)) {
if (!Q.CxtI || !SameFunction(Load, Q.CxtI) ||
!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As I mentioned earlier, only check this after ensuring that the result can be improved.

Suggested change
!isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {
Known.countMinTrailingZeros() >= Log2(Load->getAlign()) || !isValidAssumeForContext(Load, Q.CxtI, Q.DT)) {

We need a lambda helper function to avoid duplicating this for stores.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed, decided to use getLoadStorePointerOperand and getLoadStoreAlignment.

@dtcxzyw dtcxzyw changed the title [Analysis] Enhance alignment propagation in computeKnownBits. [ValueTracking] Enhance alignment propagation in computeKnownBits. Nov 13, 2025
@nikic
Copy link
Contributor

nikic commented Nov 13, 2025

I don't think we should be doing this in ValueTracking. Can we instead extend InferAlignment to also "infer" alignment for the and of ptrtoint pattern?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't switch tests to generated checks in a functional pr

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry, fixed

@egorshamshura
Copy link
Contributor Author

Currently added version that handles infromation about alignment in ValueTracking. Let me know if you decide to handle it in InferAlignment.

Comment on lines 2378 to 2380
return Known.countMinTrailingZeros() >=
Log2(getLoadStoreAlignment(Instruction)) ||
Instruction->getFunction() != CxtFunc ||
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why you need the first condition.

Suggested change
return Known.countMinTrailingZeros() >=
Log2(getLoadStoreAlignment(Instruction)) ||
Instruction->getFunction() != CxtFunc ||
return Instruction->getFunction() != CxtFunc ||

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See my previous comment #166935 (comment).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah OK, but putting that check in a function called InvalidUser is pretty misleading! In fact I don't think having InvalidUser is helping at all and it should just be inlined.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah. It deserves a better name. I am not sure if we will leverage the align attributes on the pointer parameters of memory intrinsics in the future. I'd adjust the lambda to something like auto RefineKnownBitsWithAlign = [](Instruction *I, Align alignment).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see the branches for load/store are merged into one if statement with getLoadStorePointerOperand. It is ok to inline the logic for now.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 14, 2025

This patch has a significant compile-time impact: https://llvm-compile-time-tracker.com/compare.php?from=3d4156700ef2ea678c3dabf796bc8622ac0d7ecb&to=3ab8d44570e1dd7d7010b3bce624dc3f26141cb7&stat=instructions:u We should limit the number of uses to inspect (e.g., <= DomConditionsMaxUses).

If it is still expensive, we may move the logic into InferAlignment and only handle some specific patterns.

@nikic
Copy link
Contributor

nikic commented Nov 17, 2025

I think that making the limit low enough to avoid the compile time impact will make this optimization quite unreliable. While any given load/store user will likely have the same alignment, it's likely that the one inspected user will either be a GEP or not dominate. And of course, the picked user depends on the use-list order.

Doing this in InferAlignment should be more reliable, with the downside that it runs late in the pipeline and other computeKnownBits users can't benefit.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:AMDGPU backend:PowerPC backend:RISC-V backend:SystemZ clang Clang issues not falling into any other category coroutines C++20 coroutines llvm:analysis Includes value tracking, cost tables and constant folding llvm:globalisel llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

6 participants