Skip to content

Conversation

@actinks
Copy link
Contributor

@actinks actinks commented Jul 6, 2025

@actinks actinks requested a review from nikic as a code owner July 6, 2025 09:38
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jul 6, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 6, 2025

@llvm/pr-subscribers-llvm-analysis

@llvm/pr-subscribers-llvm-transforms

Author: Acthinks Yang (Acthinks)

Changes

Proof: https://alive2.llvm.org/ce/z/5K6q5z
Closes #146642


Full diff: https://github.com/llvm/llvm-project/pull/147182.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp (+110)
  • (added) llvm/test/Transforms/InstCombine/icmp-select-operator-constant.ll (+326)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c6f317a668cfe..ee5ed3b5b75e6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4332,6 +4332,98 @@ Instruction *InstCombinerImpl::foldICmpInstWithConstantNotInt(ICmpInst &I) {
   return nullptr;
 }
 
+/// If the APInt C has the same invertible function with Operator RefOp in Pred,
+/// return the operands of the function corresponding to each input. Otherwise,
+/// return std::nullopt. This is equivalent to saying that Op1 pred Op2 is true
+/// exactly when the specified pair of RefOp pred C is true.
+/// alive2: https://alive2.llvm.org/ce/z/4jniEb
+static std::optional<std::pair<Value *, Value *>>
+getInvertibleOperandsWithPredicte(const Operator *RefOp, const APInt C,
+                                  CmpInst::Predicate Pred) {
+  APInt Op1C;
+  // for BinaryOperator just handle RefOp with constant Operand(1)
+  if (isa<BinaryOperator>(RefOp)) {
+    if (isa<ConstantInt>(RefOp->getOperand(1)))
+      Op1C = cast<ConstantInt>(RefOp->getOperand(1))->getValue();
+    else
+      return std::nullopt;
+  }
+
+  auto getOperands = [&](APInt A) -> auto {
+    return std::make_pair(RefOp->getOperand(0),
+                          ConstantInt::get(RefOp->getOperand(0)->getType(), A));
+  };
+  switch (RefOp->getOpcode()) {
+  default:
+    break;
+  case Instruction::Or:
+    if (cast<PossiblyDisjointInst>(RefOp)->isDisjoint() && ((C & Op1C) == Op1C))
+      return getOperands(C ^ Op1C);
+    break;
+  case Instruction::Add: {
+    // TODO: add/sub could support nsw/nuw for scmp/ucmp
+    if (CmpInst::isEquality(Pred))
+      return getOperands(C - Op1C);
+    break;
+  }
+  case Instruction::Xor: {
+    if (CmpInst::isEquality(Pred))
+      return getOperands(C ^ Op1C);
+    break;
+  }
+  case Instruction::Sub: {
+    if (CmpInst::isEquality(Pred))
+      return getOperands(C + Op1C);
+    break;
+  }
+  // alive2: https://alive2.llvm.org/ce/z/WPQznV
+  case Instruction::Shl: {
+    // Z = shl nsw X, Y <=> X = ashr exact Z, Y
+    // Z = shl nuw X, Y <=> X = lshr exact Z, Y
+    if (C.ashr(Op1C).shl(Op1C) == C) {
+      auto *OBO1 = cast<OverflowingBinaryOperator>(RefOp);
+      if (OBO1->hasNoSignedWrap())
+        return getOperands(C.ashr(Op1C));
+      else if (OBO1->hasNoUnsignedWrap() && !ICmpInst::isSigned(Pred))
+        return getOperands(C.lshr(Op1C));
+    }
+    break;
+  }
+  case Instruction::AShr: {
+    // Z = ashr exact X, Y <=> X = shl nsw Z, Y
+    auto *PEO1 = cast<PossiblyExactOperator>(RefOp);
+    if (PEO1->isExact() && C.shl(Op1C).ashr(Op1C) == C)
+      return getOperands(C.shl(Op1C));
+    break;
+  }
+  case Instruction::LShr: {
+    // Z = lshr exact X, Y <=> X = shl nuw Z, Y
+    auto *PEO1 = cast<PossiblyExactOperator>(RefOp);
+    if (PEO1->isExact() && C.shl(Op1C).lshr(Op1C) == C &&
+        !ICmpInst::isSigned(Pred))
+      return getOperands(C.shl(Op1C));
+    break;
+  }
+  case Instruction::SExt: {
+    unsigned NumBits = RefOp->getType()->getScalarSizeInBits();
+    unsigned NumBitsOp0 =
+        RefOp->getOperand(0)->getType()->getScalarSizeInBits();
+    if (C.trunc(NumBitsOp0).sext(NumBits) == C)
+      return getOperands(C.trunc(NumBitsOp0));
+    break;
+  }
+  case Instruction::ZExt: {
+    unsigned NumBits = RefOp->getType()->getScalarSizeInBits();
+    unsigned NumBitsOp0 =
+        RefOp->getOperand(0)->getType()->getScalarSizeInBits();
+    if (C.trunc(NumBitsOp0).zext(NumBits) == C && !ICmpInst::isSigned(Pred))
+      return getOperands(C.trunc(NumBitsOp0));
+    break;
+  }
+  }
+  return std::nullopt;
+}
+
 Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI,
                                               Value *RHS, const ICmpInst &I) {
   // Try to fold the comparison into the select arms, which will cause the
@@ -4391,6 +4483,24 @@ Instruction *InstCombinerImpl::foldSelectICmp(CmpPredicate Pred, SelectInst *SI,
     return SelectInst::Create(SI->getOperand(0), Op1, Op2);
   }
 
+  // fold select with constants and invertible op
+  Value *Cond;
+  const APInt *C1, *C2;
+  auto *RHSOp = dyn_cast<Operator>(RHS);
+  if (RHSOp &&
+      match(SI, m_OneUse(m_Select(m_Value(Cond), m_APInt(C1), m_APInt(C2))))) {
+    if (auto Values0 = getInvertibleOperandsWithPredicte(RHSOp, *C1, Pred)) {
+      if (auto Values1 = getInvertibleOperandsWithPredicte(RHSOp, *C2, Pred)) {
+        assert(Values0->first == Values1->first &&
+               "Invertible Operand0 mismatch");
+        auto *NewSI = Builder.CreateSelect(Cond, Values0->second,
+                                           Values1->second, SI->getName());
+        return ICmpInst::Create(Instruction::ICmp, I.getPredicate(), NewSI,
+                                Values0->first, I.getName());
+      }
+    }
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/icmp-select-operator-constant.ll b/llvm/test/Transforms/InstCombine/icmp-select-operator-constant.ll
new file mode 100644
index 0000000000000..944d0a00bfe75
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-select-operator-constant.ll
@@ -0,0 +1,326 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+; shl nsw
+; scmp
+define i1 @shl_nsw_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nsw_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 1, i8 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nsw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 16
+  %cmp = icmp sgt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+; scmp commutative
+define i1 @c_shl_nsw_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @c_shl_nsw_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 1, i8 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sel = select i1 %cond, i8 8, i8 16
+  %a_shl = shl nsw i8 %a, 3
+  %cmp = icmp sgt i8 %sel, %a_shl
+  ret i1 %cmp
+}
+; scmp mismatch
+define i1 @shl_nsw_scmp_mismatch(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nsw_scmp_mismatch(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[A_SHL:%.*]] = shl nsw i8 [[A]], 3
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], i8 8, i8 1
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[A_SHL]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nsw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 1
+  %cmp = icmp sgt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+; ucmp
+define i1 @shl_nsw_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nsw_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 1, i8 3
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nsw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 24
+  %cmp = icmp ugt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+
+; shl nuw only ucmp/eq/ne
+; ucmp
+define i1 @shl_nuw_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nuw_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 1, i8 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nuw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 32
+  %cmp = icmp ult i8 %a_shl, %sel
+  ret i1 %cmp
+}
+
+; eq
+define i1 @shl_nuw_eqcmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nuw_eqcmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 1, i8 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nuw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 64
+  %cmp = icmp eq i8 %sel, %a_shl
+  ret i1 %cmp
+}
+
+; scmp mismatch
+define i1 @shl_nuw_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @shl_nuw_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[A_SHL:%.*]] = shl nuw i8 [[A]], 3
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], i8 8, i8 32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[A_SHL]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = shl nuw i8 %a, 3
+  %sel = select i1 %cond, i8 8, i8 32
+  %cmp = icmp slt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+
+; ashr exact
+; ucmp
+define i1 @ashr_exact_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @ashr_exact_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 48, i8 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = ashr exact i8 %a, 2
+  %sel = select i1 %cond, i8 12, i8 4
+  %cmp = icmp uge i8 %a_shl, %sel
+  ret i1 %cmp
+}
+; scmp
+define i1 @ashr_exact_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @ashr_exact_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 32, i8 16
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = ashr exact i8 %a, 2
+  %sel = select i1 %cond, i8 8, i8 4
+  %cmp = icmp sgt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+
+; lshr exact only ucmp/eq/ne
+; ucmp
+define i1 @lshr_exact_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @lshr_exact_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 2, i8 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = lshr exact i8 %a, 1
+  %sel = select i1 %cond, i8 1, i8 3
+  %cmp = icmp ugt i8 %a_shl, %sel
+  ret i1 %cmp
+}
+; scmp mismatch
+define i1 @lshr_exact_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @lshr_exact_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 2, i8 6
+; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %a_shl = lshr exact i8 %a, 1
+  %sel = select i1 %cond, i8 1, i8 3
+  %cmp = icmp sge i8 %a_shl, %sel
+  ret i1 %cmp
+}
+
+; zext only ucmp/eq/ne
+; ucmp
+define i1 @zext_ucmp(i8 %a, i16 %c0, i16 %c1, i1 %cond) {
+; CHECK-LABEL: define i1 @zext_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[C0:%.*]], i16 [[C1:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 -128, i8 64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %idx = zext i8 %a to i16
+  %sel = select i1 %cond, i16 128, i16 64
+  %cmp = icmp ult i16 %idx, %sel
+  ret i1 %cmp
+}
+; scmp mismatch
+define i1 @zext_scmp_mismatch(i8 %a, i16 %c0, i16 %c1, i1 %cond) {
+; CHECK-LABEL: define i1 @zext_scmp_mismatch(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[C0:%.*]], i16 [[C1:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 -128, i8 64
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %idx = zext i8 %a to i16
+  %sel = select i1 %cond, i16 128, i16 64
+  %cmp = icmp slt i16 %idx, %sel
+  ret i1 %cmp
+}
+
+; sext
+; ucmp
+define i1 @sext_ucmp(i8 %a, i16 %c0, i16 %c1, i1 %cond) {
+; CHECK-LABEL: define i1 @sext_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[C0:%.*]], i16 [[C1:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 -127, i8 126
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %idx = sext i8 %a to i16
+  %sel = select i1 %cond, i16 -127, i16 126
+  %cmp = icmp ult i16 %idx, %sel
+  ret i1 %cmp
+}
+; ucmp mismatch
+define i1 @sext_ucmp_mismatch(i8 %a, i16 %c0, i16 %c1, i1 %cond) {
+; CHECK-LABEL: define i1 @sext_ucmp_mismatch(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[C0:%.*]], i16 [[C1:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[IDX:%.*]] = sext i8 [[A]] to i16
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], i16 -129, i16 128
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i16 [[SEL]], [[IDX]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %idx = sext i8 %a to i16
+  %sel = select i1 %cond, i16 -129, i16 128
+  %cmp = icmp ult i16 %idx, %sel
+  ret i1 %cmp
+}
+; scmp
+define i1 @sext_scmp(i8 %a, i16 %c0, i16 %c1, i1 %cond) {
+; CHECK-LABEL: define i1 @sext_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i16 [[C0:%.*]], i16 [[C1:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 -5, i8 9
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %idx = sext i8 %a to i16
+  %sel = select i1 %cond, i16 -5, i16 9
+  %cmp = icmp slt i16 %idx, %sel
+  ret i1 %cmp
+}
+
+; or disjoint
+; ucmp
+define i1 @or_disjoint_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @or_disjoint_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 8, i8 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %or = or disjoint i8 %a, 3
+  %sel = select i1 %cond, i8 11, i8 7
+  %cmp = icmp ult i8 %or, %sel
+  ret i1 %cmp
+}
+; scmp mismatch
+define i1 @or_disjoint_scmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @or_disjoint_scmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 8, i8 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %or = or disjoint i8 %a, 3
+  %sel = select i1 %cond, i8 11, i8 7
+  %cmp = icmp slt i8 %or, %sel
+  ret i1 %cmp
+}
+; mismatch constant '4' not disjoint
+define i1 @or_ucmp_mismatch(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @or_ucmp_mismatch(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i8 [[A]], 3
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], i8 11, i8 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i8 [[OR]], [[SEL]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %or = or disjoint i8 %a, 3
+  %sel = select i1 %cond, i8 11, i8 4
+  %cmp = icmp ult i8 %or, %sel
+  ret i1 %cmp
+}
+
+; sub only eq/ne
+define i1 @sub_eq(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @sub_eq(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], i8 4, i8 12
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i8 %a, 5
+  %sel = select i1 %cond, i8 -1, i8 7
+  %cmp = icmp eq i8 %sub, %sel
+  ret i1 %cmp
+}
+; ucmp mismatch
+define i1 @sub_ucmp(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @sub_ucmp(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = add i8 [[A]], -13
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ult i8 [[TMP1]], -8
+; CHECK-NEXT:    [[NOT_COND:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    [[CMP:%.*]] = select i1 [[NOT_COND]], i1 [[CMP1]], i1 false
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = sub i8 %a, 5
+  %sel = select i1 %cond, i8 -1, i8 7
+  %cmp = icmp ugt i8 %sub, %sel
+  ret i1 %cmp
+}
+
+; add only eq/ne
+define i1 @add_ne(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @add_ne(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[COND]], i8 -6, i8 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i8 [[TMP1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = add i8 %a, 5
+  %sel = select i1 %cond, i8 -1, i8 7
+  %cmp = icmp ne i8 %sub, %sel
+  ret i1 %cmp
+}
+
+; xor only eq/ne
+define i1 @xor_eq(i8 %a, i1 %cond) {
+; CHECK-LABEL: define i1 @xor_eq(
+; CHECK-SAME: i8 [[A:%.*]], i1 [[COND:%.*]]) {
+; CHECK-NEXT:    [[SEL1:%.*]] = select i1 [[COND]], i8 -6, i8 2
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[SEL1]], [[A]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %sub = xor i8 %a, 5
+  %sel = select i1 %cond, i8 -1, i8 7
+  %cmp = icmp eq i8 %sub, %sel
+  ret i1 %cmp
+}

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

@github-actions
Copy link

github-actions bot commented Jul 6, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@actinks actinks force-pushed the cmp_select_invertible_const branch from 0431871 to 528aa2b Compare July 6, 2025 15:09
@actinks
Copy link
Contributor Author

actinks commented Jul 7, 2025

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

Do we need to keep the optimization for ucmp/scmp?

@nikic
Copy link
Contributor

nikic commented Jul 9, 2025

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

Do we need to keep the optimization for ucmp/scmp?

I think it should be possible to generalize that fold to handle unsigned/signed predicate as well (just changes which ops can be applied).

But probably it makes sense to start with just the equality case.

@nikic
Copy link
Contributor

nikic commented Jul 9, 2025

On the other hand, things like the sext/zext handling wouldn't really fit into how foldICmpEqualityWithOffset works, so maybe there is value in the separate handling...

@llvmbot llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Jul 10, 2025
@actinks actinks force-pushed the cmp_select_invertible_const branch from 351961c to bca4a72 Compare July 10, 2025 12:09
@actinks actinks marked this pull request as draft July 11, 2025 02:44
@actinks
Copy link
Contributor Author

actinks commented Jul 11, 2025

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

It seems that we cannot simply extend foldICmpEqualityWithOffset to handle shift operations and bit operations that require Op0 and Op1 to satisfy certain properties at the same time. For example, Shl nsw/nuw, AShr exact. The proof is given below:
https://alive2.llvm.org/ce/z/NKvGtR

define i1 @src_shl_nsw_eq(i8 %a, i1 %cond) {
  %a_shl = shl nsw i8 %a, 3
  %sel = select i1 %cond, i8 4, i8 16
  %cmp = icmp eq i8 %a_shl, %sel
  ret i1 %cmp
}

define i1 @tgt_shl_nsw_eq(i8 %a, i1 %cond) {
  %1 = select i1 %cond, i8 0, i8 2
  %cmp = icmp eq i8 %a, %1
  ret i1 %cmp
}

In the actual processing flow of instcombine, it is captured by foldSelectICmp in advance, so the above code will not be optimized out, but this seems unsafe. Actual optimization results:

define i1 @shl_nsw_eq(i8 %a, i1 %cond) {
  %cmp1 = icmp eq i8 %a, 2
  %not.cond = xor i1 %cond, true
  %cmp = select i1 %not.cond, i1 %cmp1, i1 false
  ret i1 %cmp
}

@dtcxzyw @nikic

@actinks actinks requested a review from dtcxzyw July 12, 2025 06:21
@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 19, 2025

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

It seems that we cannot simply extend foldICmpEqualityWithOffset to handle shift operations and bit operations that require Op0 and Op1 to satisfy certain properties at the same time. For example, Shl nsw/nuw, AShr exact. The proof is given below: https://alive2.llvm.org/ce/z/NKvGtR

define i1 @src_shl_nsw_eq(i8 %a, i1 %cond) {
  %a_shl = shl nsw i8 %a, 3
  %sel = select i1 %cond, i8 4, i8 16
  %cmp = icmp eq i8 %a_shl, %sel
  ret i1 %cmp
}

define i1 @tgt_shl_nsw_eq(i8 %a, i1 %cond) {
  %1 = select i1 %cond, i8 0, i8 2
  %cmp = icmp eq i8 %a, %1
  ret i1 %cmp
}

In the actual processing flow of instcombine, it is captured by foldSelectICmp in advance, so the above code will not be optimized out, but this seems unsafe. Actual optimization results:

define i1 @shl_nsw_eq(i8 %a, i1 %cond) {
  %cmp1 = icmp eq i8 %a, 2
  %not.cond = xor i1 %cond, true
  %cmp = select i1 %not.cond, i1 %cmp1, i1 false
  ret i1 %cmp
}

Yeah, we need an extra check for right shifts with the exact flag. Can we start with handling constant shamts and checking if the right shift doesn't produce poison?

@actinks
Copy link
Contributor Author

actinks commented Jul 23, 2025

We can simply extend foldICmpEqualityWithOffset to handle the motivating case.

It seems that we cannot simply extend foldICmpEqualityWithOffset to handle shift operations and bit operations that require Op0 and Op1 to satisfy certain properties at the same time. For example, Shl nsw/nuw, AShr exact. The proof is given below: https://alive2.llvm.org/ce/z/NKvGtR

define i1 @src_shl_nsw_eq(i8 %a, i1 %cond) {
  %a_shl = shl nsw i8 %a, 3
  %sel = select i1 %cond, i8 4, i8 16
  %cmp = icmp eq i8 %a_shl, %sel
  ret i1 %cmp
}

define i1 @tgt_shl_nsw_eq(i8 %a, i1 %cond) {
  %1 = select i1 %cond, i8 0, i8 2
  %cmp = icmp eq i8 %a, %1
  ret i1 %cmp
}

In the actual processing flow of instcombine, it is captured by foldSelectICmp in advance, so the above code will not be optimized out, but this seems unsafe. Actual optimization results:

define i1 @shl_nsw_eq(i8 %a, i1 %cond) {
  %cmp1 = icmp eq i8 %a, 2
  %not.cond = xor i1 %cond, true
  %cmp = select i1 %not.cond, i1 %cmp1, i1 false
  ret i1 %cmp
}

Yeah, we need an extra check for right shifts with the exact flag. Can we start with handling constant shamts and checking if the right shift doesn't produce poison?

I haven't found a suitable implementation based on foldICmpEqualityWithOffset. A lot of judgment logic needs to be added to the shift class, and the original op and flag need to be kept to determine whether the simplifyBinOp result is available. Is this in line with expectations?

@dtcxzyw
Copy link
Member

dtcxzyw commented Jul 24, 2025

I haven't found a suitable implementation based on foldICmpEqualityWithOffset. A lot of judgment logic needs to be added to the shift class, and the original op and flag need to be kept to determine whether the simplifyBinOp result is available. Is this in line with expectations?

Sounds reasonable.

@actinks actinks force-pushed the cmp_select_invertible_const branch 2 times, most recently from 2ebb4d3 to cfbae3b Compare July 26, 2025 18:19
@actinks actinks marked this pull request as ready for review July 26, 2025 18:35
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. If we need to catch more complicated patterns, we can use MaskedValueIsZero/ComputeNumSignBits in the future.

@actinks actinks changed the title [InstCombine] fold icmp of select with constants and invertible op [InstCombine] fold icmp of select with invertible shl Oct 22, 2025
@actinks actinks requested a review from dtcxzyw November 4, 2025 16:43
@actinks actinks requested a review from nikic November 4, 2025 16:43
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Miscompilation reproducer: https://alive2.llvm.org/ce/z/-TagCh

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:
  br label %lor.end157.i.i.i

lor.end157.i.i.i:
  %conv159.i.i.i = zext i1 %cmp104.i.i.i to i16
  %sub.i122.i.i.i = sub i16 0, %conv159.i.i.i
  %conv161.i.i.i2 = zext i16 %sub.i122.i.i.i to i32
  %shl.i.i.i.i = shl nuw i32 %conv161.i.i.i, 31
  %cond.i124.i.i.i = select i1 %cmp104.i.i.i, i32 %conv161.i.i.i2, i32 %shl.i.i.i.i
  %conv163.i.i.i = zext i32 %cond.i124.i.i.i to i64
  %tobool166.not.i.i.i = icmp eq i64 %conv163.i.i.i, 0
  ret i1 %tobool166.not.i.i.i
}

define i1 @tgt(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  %0 = icmp eq i32 %conv161.i.i.i, 0
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %tobool166.not.i.i.i = phi i1 [ %0, %lor.rhs106.i.i.i ], [ true, %entry ]
  ret i1 %tobool166.not.i.i.i
}

@actinks
Copy link
Contributor Author

actinks commented Nov 9, 2025

Miscompilation reproducer: https://alive2.llvm.org/ce/z/-TagCh

Is this patch causing this error? My compilation result is https://alive2.llvm.org/ce/z/enaJTE @dtcxzyw

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:
  br label %lor.end157.i.i.i

lor.end157.i.i.i:
  %conv159.i.i.i = zext i1 %cmp104.i.i.i to i16
  %sub.i122.i.i.i = sub i16 0, %conv159.i.i.i
  %conv161.i.i.i2 = zext i16 %sub.i122.i.i.i to i32
  %shl.i.i.i.i = shl nuw i32 %conv161.i.i.i, 31
  %cond.i124.i.i.i = select i1 %cmp104.i.i.i, i32 %conv161.i.i.i2, i32 %shl.i.i.i.i
  %conv163.i.i.i = zext i32 %cond.i124.i.i.i to i64
  %tobool166.not.i.i.i = icmp eq i64 %conv163.i.i.i, 0
  ret i1 %tobool166.not.i.i.i
}

define i1 @tgt(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp sgt i64 %indvars.iv.i, 0
  br i1 %cmp104.i.i.i, label %lor.rhs106.i.i.i, label %lor.end157.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %tobool166.not.i.i.i1 = icmp eq i32 %conv161.i.i.i, 0
  %tobool166.not.i.i.i = select i1 %cmp104.i.i.i, i1 %tobool166.not.i.i.i1, i1 false
  ret i1 %tobool166.not.i.i.i
}

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 9, 2025

Miscompilation reproducer: https://alive2.llvm.org/ce/z/-TagCh

Is this patch causing this error? My compilation result is https://alive2.llvm.org/ce/z/enaJTE @dtcxzyw

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:
  br label %lor.end157.i.i.i

lor.end157.i.i.i:
  %conv159.i.i.i = zext i1 %cmp104.i.i.i to i16
  %sub.i122.i.i.i = sub i16 0, %conv159.i.i.i
  %conv161.i.i.i2 = zext i16 %sub.i122.i.i.i to i32
  %shl.i.i.i.i = shl nuw i32 %conv161.i.i.i, 31
  %cond.i124.i.i.i = select i1 %cmp104.i.i.i, i32 %conv161.i.i.i2, i32 %shl.i.i.i.i
  %conv163.i.i.i = zext i32 %cond.i124.i.i.i to i64
  %tobool166.not.i.i.i = icmp eq i64 %conv163.i.i.i, 0
  ret i1 %tobool166.not.i.i.i
}

define i1 @tgt(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp sgt i64 %indvars.iv.i, 0
  br i1 %cmp104.i.i.i, label %lor.rhs106.i.i.i, label %lor.end157.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %tobool166.not.i.i.i1 = icmp eq i32 %conv161.i.i.i, 0
  %tobool166.not.i.i.i = select i1 %cmp104.i.i.i, i1 %tobool166.not.i.i.i1, i1 false
  ret i1 %tobool166.not.i.i.i
}

Confirmed. I forgot to include the datalayout string in the reproducer. Sorry about this.

; bin/opt -passes=instcombine test.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %conv159.i.i.i = zext i1 %cmp104.i.i.i to i16
  %sub.i122.i.i.i = sub i16 0, %conv159.i.i.i
  %conv161.i.i.i2 = zext i16 %sub.i122.i.i.i to i32
  %shl.i.i.i.i = shl nuw i32 %conv161.i.i.i, 31
  %cond.i124.i.i.i = select i1 %cmp104.i.i.i, i32 %conv161.i.i.i2, i32 %shl.i.i.i.i
  %conv163.i.i.i = zext i32 %cond.i124.i.i.i to i64
  %tobool166.not.i.i.i = icmp eq i64 %conv163.i.i.i, 0
  ret i1 %tobool166.not.i.i.i
}

With this patch:

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  %0 = icmp eq i32 %conv161.i.i.i, 0
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %tobool166.not.i.i.i = phi i1 [ %0, %lor.rhs106.i.i.i ], [ true, %entry ]
  ret i1 %tobool166.not.i.i.i
}

Without this patch: https://alive2.llvm.org/ce/z/eL3WkX

define i1 @src(i64 %indvars.iv.i, i32 %conv161.i.i.i) {
entry:
  %cmp104.i.i.i = icmp slt i64 %indvars.iv.i, 1
  br i1 %cmp104.i.i.i, label %lor.end157.i.i.i, label %lor.rhs106.i.i.i

lor.rhs106.i.i.i:                                 ; preds = %entry
  br label %lor.end157.i.i.i

lor.end157.i.i.i:                                 ; preds = %lor.rhs106.i.i.i, %entry
  %conv161.i.i.i2 = phi i32 [ 0, %lor.rhs106.i.i.i ], [ 65535, %entry ]
  %shl.i.i.i.i = shl nuw i32 %conv161.i.i.i, 31
  %cond.i124.i.i.i = select i1 %cmp104.i.i.i, i32 %conv161.i.i.i2, i32 %shl.i.i.i.i
  %tobool166.not.i.i.i = icmp eq i32 %cond.i124.i.i.i, 0
  ret i1 %tobool166.not.i.i.i
}

If it is still not reproducible, use the base commit 7734276.

@actinks actinks force-pushed the cmp_select_invertible_const branch from 8c33523 to 34d2f1d Compare November 9, 2025 15:49
; CHECK-LABEL: @shl_const_phi_failed_to_simplify(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i64 [[INDVARS:%.*]], 1
; CHECK-NEXT: br i1 [[CMP_SLT]], label [[END:%.*]], label [[THEN:%.*]]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @dtcxzyw . The issue, caused by the "right-shift mode" only intercepting constants and failing to intercept phi nodes with two constant incoming values, has been temporarily resolved by reverting to a previous version.

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 10, 2025

I think you have met the requirements to request commit access. See https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access

@actinks
Copy link
Contributor Author

actinks commented Nov 10, 2025

I think you have met the requirements to request commit access. See https://llvm.org/docs/DeveloperPolicy.html#obtaining-commit-access

Oh, thank you so much @dtcxzyw

@actinks actinks merged commit 583fba3 into llvm:main Nov 28, 2025
10 checks passed
aahrun pushed a commit to aahrun/llvm-project that referenced this pull request Dec 1, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:analysis Includes value tracking, cost tables and constant folding llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[InstCombine] Missed optimization for icmp of select with constants and invertible binop

4 participants