Skip to content

Conversation

@bababuck
Copy link
Contributor

@bababuck bababuck commented Nov 25, 2025

File Edit Options Buffers Tools Help
The following C-code:

bool or_icmp(int type) {
  return type == 0 || type == 6 || type == 15;
}

Currently lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %cmp = icmp eq i32 %type, 6
  %cmp1 = icmp eq i32 %type, 0
  %or.cond = or i1 %cmp, %cmp1
  %cmp3 = icmp eq i32 %type, 15
  %or.cond1 = or i1 %cmp3, %or.cond
  ret i1 %or.cond1
}

But more optimally lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %srl = lshr i32 32833, %type
  %srl.1 = trunc i32 %srl to i1
  %cmp = icmp ult i32 %type, 64
  %and = and i1 %srl.1, %cmp
  ret i1 %and
}

Alive Proofs for the test cases:
or_icmp_2: https://alive2.llvm.org/ce/z/9W4PZJ

or_icmp_3: https://alive2.llvm.org/ce/z/zrVHSh

or_icmp_7: https://alive2.llvm.org/ce/z/yBoMer

or_icmp_i64: https://alive2.llvm.org/ce/z/Jas5gE

or_icmp_expand: https://alive2.llvm.org/ce/z/waU32g

or_icmp_expand_trunc_type_shr: https://alive2.llvm.org/ce/z/Catc9V

or_icmp_expand_zext_cmp: https://alive2.llvm.org/ce/z/a7-d3B

or_icmp_i128: https://alive2.llvm.org/ce/z/ng6WJR

or_imcp_expand_128: https://alive2.llvm.org/ce/z/vy39kW

The following C-code:

bool or_icmp(int type) {
  return type == 0 || type == 6 || type == 15;
}

Currently lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %cmp = icmp eq i32 %type, 6
  %cmp1 = icmp eq i32 %type, 0
  %or.cond = or i1 %cmp, %cmp1
  %cmp3 = icmp eq i32 %type, 15
  %or.cond1 = or i1 %cmp3, %or.cond
  ret i1 %or.cond1
}

But more optimally lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %srl = lshr i32 32833, %type
  %srl.1 = trunc i32 %srl to i1
  %cmp = icmp ult i32 %type, 64
  %and = and i1 %srl.1, %cmp
  ret i1 %and
}
@llvmbot
Copy link
Member

llvmbot commented Nov 25, 2025

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-risc-v

Author: Ryan Buchner (bababuck)

Changes

File Edit Options Buffers Tools Help
The following C-code:

bool or_icmp(int type) {
  return type == 0 || type == 6 || type == 15;
}

Currently lowers to:

define i1 @<!-- -->or_icmp(i32 signext %type) {
entry:
  %cmp = icmp eq i32 %type, 6
  %cmp1 = icmp eq i32 %type, 0
  %or.cond = or i1 %cmp, %cmp1
  %cmp3 = icmp eq i32 %type, 15
  %or.cond1 = or i1 %cmp3, %or.cond
  ret i1 %or.cond1
}

But more optimally lowers to:

define i1 @<!-- -->or_icmp(i32 signext %type) {
entry:
  %srl = lshr i32 32833, %type
  %srl.1 = trunc i32 %srl to i1
  %cmp = icmp ult i32 %type, 64
  %and = and i1 %srl.1, %cmp
  ret i1 %and
}

Full diff: https://github.com/llvm/llvm-project/pull/169557.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+49)
  • (added) llvm/test/CodeGen/RISCV/flag_check.ll (+241)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 3b250d7d9ad1f..50076dbb4555e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -16736,6 +16736,52 @@ static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
 }
 
+// or (icmp eq x, imm0), (icmp eq x, imm1) -> czero.eqz (sltui x, 64), (bext x,
+// 1 << imm0 | 1 << imm1) If [imm0, imm1] < 64
+static SDValue combineOrOfImmCmpToBitExtract(SDNode *N, SelectionDAG &DAG,
+                                             const RISCVSubtarget &Subtarget) {
+  using namespace SDPatternMatch;
+
+  auto CollectSetEqImmTree = [](auto &&Self, SmallVector<APInt, 4> &FlagVals,
+                                SDNode *N, SDValue &X) -> bool {
+    APInt Imm;
+    if (X ? sd_match(N, m_OneUse(m_SetCC(m_Specific(X), m_ConstInt(Imm),
+                                         m_SpecificCondCode(ISD::SETEQ))))
+          : sd_match(N, m_OneUse(m_SetCC(m_Value(X), m_ConstInt(Imm),
+                                         m_SpecificCondCode(ISD::SETEQ))))) {
+      FlagVals.push_back(Imm);
+      return true;
+    }
+    SDValue LHS, RHS;
+    if (sd_match(N, m_OneUse(m_Or(m_Value(LHS), m_Value(RHS))))) {
+      return Self(Self, FlagVals, LHS.getNode(), X) &&
+             Self(Self, FlagVals, RHS.getNode(), X);
+    }
+    return false;
+  };
+
+  SmallVector<APInt, 4> FlagVals;
+  SDValue X;
+  if (!CollectSetEqImmTree(CollectSetEqImmTree, FlagVals, N, X))
+    return SDValue();
+
+  unsigned XLen = Subtarget.getXLen();
+  uint64_t BitMask = 0;
+  for (auto &Imm : FlagVals) {
+    if (Imm.uge(XLen))
+      return SDValue();
+    BitMask |= ((uint64_t)1 << Imm.getZExtValue());
+  }
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SDValue BitExtract =
+      DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(BitMask, DL, VT), X);
+  SDValue Lt64Check =
+      DAG.getSetCC(DL, VT, X, DAG.getConstant(XLen, DL, VT), ISD::SETULT);
+  return DAG.getNode(ISD::AND, DL, VT, Lt64Check, BitExtract);
+}
+
 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                 const RISCVSubtarget &Subtarget) {
   SelectionDAG &DAG = DCI.DAG;
@@ -16748,6 +16794,9 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
     return V;
   if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
     return V;
+  if (DCI.isAfterLegalizeDAG())
+    if (SDValue V = combineOrOfImmCmpToBitExtract(N, DAG, Subtarget))
+      return V;
 
   if (DCI.isAfterLegalizeDAG())
     if (SDValue V = combineDeMorganOfBoolean(N, DAG))
diff --git a/llvm/test/CodeGen/RISCV/flag_check.ll b/llvm/test/CodeGen/RISCV/flag_check.ll
new file mode 100644
index 0000000000000..86049bf53379c
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/flag_check.ll
@@ -0,0 +1,241 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+
+define i1 @or_icmp_2(i32 signext %type) {
+; RV32-LABEL: or_icmp_2:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    li a1, 65
+; RV32-NEXT:    srl a1, a1, a0
+; RV32-NEXT:    sltiu a0, a0, 32
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, 65
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  ret i1 %or.cond
+}
+
+define i1 @or_icmp_3(i32 signext %type) {
+; RV32-LABEL: or_icmp_3:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lui a1, 8
+; RV32-NEXT:    addi a1, a1, 65
+; RV32-NEXT:    srl a1, a1, a0
+; RV32-NEXT:    sltiu a0, a0, 32
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_3:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    addi a1, a1, 65
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  %cmp3 = icmp eq i32 %type, 15
+  %or.cond1 = or i1 %cmp3, %or.cond
+  ret i1 %or.cond1
+}
+
+define i1 @or_icmp_4_tree(i32 signext %type) {
+; RV32-LABEL: or_icmp_4_tree:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lui a1, 1032
+; RV32-NEXT:    addi a1, a1, 65
+; RV32-NEXT:    srl a1, a1, a0
+; RV32-NEXT:    sltiu a0, a0, 32
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_4_tree:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a1, 1032
+; RV64-NEXT:    addi a1, a1, 65
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  %cmp2 = icmp eq i32 %type, 15
+  %cmp3 = icmp eq i32 %type, 22
+  %or.cond1 = or i1 %cmp2, %cmp3
+  %or.cond2 = or i1 %or.cond1, %or.cond
+  ret i1 %or.cond2
+}
+
+define i1 @or_icmp_7(i32 signext %type) {
+; RV32-LABEL: or_icmp_7:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    lui a1, 589860
+; RV32-NEXT:    addi a1, a1, 73
+; RV32-NEXT:    srl a1, a1, a0
+; RV32-NEXT:    sltiu a0, a0, 32
+; RV32-NEXT:    and a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_7:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a1, 147465
+; RV64-NEXT:    slli a1, a1, 2
+; RV64-NEXT:    addi a1, a1, 73
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  %cmp2 = icmp eq i32 %type, 17
+  %or.cond1 = or i1 %cmp2, %or.cond
+  %cmp3 = icmp eq i32 %type, 3
+  %or.cond2 = or i1 %cmp3, %or.cond1
+  %cmp4 = icmp eq i32 %type, 31
+  %or.cond3 = or i1 %cmp4, %or.cond2
+  %cmp5 = icmp eq i32 %type, 14
+  %or.cond4 = or i1 %cmp5, %or.cond3
+  %cmp6 = icmp eq i32 %type, 28
+  %or.cond5 = or i1 %cmp6, %or.cond4
+  ret i1 %or.cond5
+}
+
+define i1 @or_icmp_gte_64(i32 signext %type) {
+; CHECK-LABEL: or_icmp_gte_64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a1, a0, -6
+; CHECK-NEXT:    addi a0, a0, -64
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 64
+  %or.cond = or i1 %cmp, %cmp1
+  ret i1 %or.cond
+}
+
+define i1 @or_icmp_multiple_uses(i32 signext %type) {
+; CHECK-LABEL: or_icmp_multiple_uses:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a1, a0, -6
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    xor a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  %or.cond1 = xor i1 %cmp, %or.cond
+  ret i1 %or.cond1
+}
+
+
+define i1 @or_icmp_not_eq(i32 signext %type) {
+; CHECK-LABEL: or_icmp_not_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a1, a0, -6
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    snez a0, a0
+; CHECK-NEXT:    or a0, a1, a0
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp ugt i32 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  ret i1 %or.cond
+}
+
+define i1 @or_icmp_xlen(i32 signext %type) {
+; RV32-LABEL: or_icmp_xlen:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    addi a1, a0, -6
+; RV32-NEXT:    addi a0, a0, -32
+; RV32-NEXT:    seqz a1, a1
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    or a0, a1, a0
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_xlen:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    slli a1, a1, 32
+; RV64-NEXT:    addi a1, a1, 64
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type, 32
+  %or.cond = or i1 %cmp, %cmp1
+  ret i1 %or.cond
+}
+
+define i1 @or_icmp_i64(i64 signext %type) {
+; RV32-LABEL: or_icmp_i64:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    xori a2, a0, 6
+; RV32-NEXT:    or a3, a0, a1
+; RV32-NEXT:    xori a0, a0, 15
+; RV32-NEXT:    or a2, a2, a1
+; RV32-NEXT:    seqz a3, a3
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    seqz a1, a2
+; RV32-NEXT:    or a1, a1, a3
+; RV32-NEXT:    seqz a0, a0
+; RV32-NEXT:    or a0, a0, a1
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: or_icmp_i64:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    lui a1, 8
+; RV64-NEXT:    addi a1, a1, 65
+; RV64-NEXT:    srl a1, a1, a0
+; RV64-NEXT:    sltiu a0, a0, 64
+; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    ret
+entry:
+  %cmp = icmp eq i64 %type, 6
+  %cmp1 = icmp eq i64 %type, 0
+  %or.cond = or i1 %cmp, %cmp1
+  %cmp3 = icmp eq i64 %type, 15
+  %or.cond1 = or i1 %cmp3, %or.cond
+  ret i1 %or.cond1
+}
+
+define i1 @or_icmp_specific(i32 signext %type, i32 signext %type1) {
+; CHECK-LABEL: or_icmp_specific:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi a0, a0, -6
+; CHECK-NEXT:    addi a1, a1, -32
+; CHECK-NEXT:    seqz a0, a0
+; CHECK-NEXT:    seqz a1, a1
+; CHECK-NEXT:    or a0, a0, a1
+; CHECK-NEXT:    ret
+entry:
+  %cmp = icmp eq i32 %type, 6
+  %cmp1 = icmp eq i32 %type1, 32
+  %or.cond = or i1 %cmp, %cmp1
+  ret i1 %or.cond
+}

Will move to InstCombine
The following C-code:

bool or_icmp(int type) {
  return type == 0 || type == 6 || type == 15;
}

Currently lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %cmp = icmp eq i32 %type, 6
  %cmp1 = icmp eq i32 %type, 0
  %or.cond = or i1 %cmp, %cmp1
  %cmp3 = icmp eq i32 %type, 15
  %or.cond1 = or i1 %cmp3, %or.cond
  ret i1 %or.cond1
}

But more optimally lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %srl = lshr i32 32833, %type
  %srl.1 = trunc i32 %srl to i1
  %cmp = icmp ult i32 %type, 64
  %and = select i1 %cmd, i1 %srl.1, i1 false
  ret i1 %and
}
The following C-code:

bool or_icmp(int type) {
  return type == 0 || type == 6 || type == 15;
}

Currently lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %cmp = icmp eq i32 %type, 6
  %cmp1 = icmp eq i32 %type, 0
  %or.cond = or i1 %cmp, %cmp1
  %cmp3 = icmp eq i32 %type, 15
  %or.cond1 = or i1 %cmp3, %or.cond
  ret i1 %or.cond1
}

But more optimally lowers to:

define i1 @or_icmp(i32 signext %type) {
entry:
  %srl = lshr i32 32833, %type
  %srl.1 = trunc i32 %srl to i1
  %cmp = icmp ult i32 %type, 64
  %and = select i1 %cmd, i1 %srl.1, i1 false
  ret i1 %and
}
@bababuck bababuck requested a review from nikic as a code owner November 26, 2025 22:50
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Nov 26, 2025
@bababuck bababuck changed the title [RISCV] Lower flag check pattern to use a bitmask-shift [InstCombine] Lower flag check pattern to use a bitmask-shift Nov 26, 2025
@github-actions
Copy link

github-actions bot commented Nov 26, 2025

✅ With the latest revision this PR passed the undef deprecator.

if (!validImm(LHSAP) || !validImm(RHSAP))
return nullptr;
LHSAP = LHSAP.zextOrTrunc(XLen);
RHSAP = RHSAP.zextOrTrunc(XLen);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know if this is the best way to wrangle various APInt's that may have differing BitWidth's- effectively I ensure they can be represented within XLen and then force them all to have a BitWidth or XLen.

return Imm && (*Imm < XLen);
};

// Match (or (icmp eq X, Imm0), (icmp eq X, Imm1))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The transform with just two options is about a net neutral (at least going to the RISCV backend). However, the benefit comes once we can expand to combining 3 or more comparisons. From a code standpoint, the implementation seemed cleaner to operate on a single or at a time though.

@lukel97
Copy link
Contributor

lukel97 commented Nov 27, 2025

Does this transform already exist somewhere in LLVM to some degree? Just checking in godbolt Clang trunk already seems to do a very similar transform on the C snippet in the PR description: https://compiler-explorer.com/z/qTbzc9e33

@dtcxzyw
Copy link
Member

dtcxzyw commented Nov 27, 2025

Does this transform already exist somewhere in LLVM to some degree? Just checking in godbolt Clang trunk already seems to do a very similar transform on the C snippet in the PR description: https://compiler-explorer.com/z/qTbzc9e33

See SimplifyCFGOpt::simplifyBranchOnICmpChain and simplifySwitchLookup

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants