From 5d4f0f773bb9a378e4bd7172a7954b8bad5a679e Mon Sep 17 00:00:00 2001 From: bababuck Date: Tue, 25 Nov 2025 09:47:21 -0800 Subject: [PATCH 1/7] [RISCV] Add new test for RISCV flag check optimization --- llvm/test/CodeGen/RISCV/flag_check.ll | 218 ++++++++++++++++++++++++++ 1 file changed, 218 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/flag_check.ll diff --git a/llvm/test/CodeGen/RISCV/flag_check.ll b/llvm/test/CodeGen/RISCV/flag_check.ll new file mode 100644 index 0000000000000..22df3d1859633 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/flag_check.ll @@ -0,0 +1,218 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 + +define i1 @or_icmp_2(i32 signext %type) { +; CHECK-LABEL: or_icmp_2: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_3(i32 signext %type) { +; CHECK-LABEL: or_icmp_3: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a2, a0 +; CHECK-NEXT: addi a0, a0, -15 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp3 = icmp eq i32 %type, 15 + %or.cond1 = or i1 %cmp3, %or.cond + ret i1 %or.cond1 +} + +define i1 @or_icmp_4_tree(i32 signext %type) { +; CHECK-LABEL: or_icmp_4_tree: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a2, a0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: addi a2, a0, -15 +; CHECK-NEXT: addi a0, a0, -22 +; CHECK-NEXT: seqz a2, a2 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a2, a0 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i32 %type, 15 + %cmp3 = icmp eq i32 %type, 22 + %or.cond1 = or i1 %cmp2, %cmp3 + %or.cond2 = or i1 %or.cond1, %or.cond + ret i1 %or.cond2 +} + +define i1 @or_icmp_7(i32 signext %type) { +; CHECK-LABEL: or_icmp_7: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a2, a0 +; CHECK-NEXT: addi a3, a0, -17 +; CHECK-NEXT: addi a4, a0, -3 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: or a1, a1, a2 +; CHECK-NEXT: addi a2, a0, -31 +; CHECK-NEXT: seqz a3, a3 +; CHECK-NEXT: seqz a4, a4 +; CHECK-NEXT: or a3, a4, a3 +; CHECK-NEXT: addi a4, a0, -14 +; CHECK-NEXT: seqz a2, a2 +; CHECK-NEXT: seqz a4, a4 +; CHECK-NEXT: or a2, a4, a2 +; CHECK-NEXT: addi a0, a0, -28 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a1, a3, a1 +; CHECK-NEXT: or a0, a0, a2 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i32 %type, 17 + %or.cond1 = or i1 %cmp2, %or.cond + %cmp3 = icmp eq i32 %type, 3 + %or.cond2 = or i1 %cmp3, %or.cond1 + %cmp4 = icmp eq i32 %type, 31 + %or.cond3 = or i1 %cmp4, %or.cond2 + %cmp5 = icmp eq i32 %type, 14 + %or.cond4 = or i1 %cmp5, %or.cond3 + %cmp6 = icmp eq i32 %type, 28 + %or.cond5 = or i1 %cmp6, %or.cond4 + ret i1 %or.cond5 +} + +define i1 @or_icmp_gte_64(i32 signext %type) { +; CHECK-LABEL: or_icmp_gte_64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: addi a0, a0, -64 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 64 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_multiple_uses(i32 signext %type) { +; CHECK-LABEL: or_icmp_multiple_uses: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: xor a0, a1, a0 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %or.cond1 = xor i1 %cmp, %or.cond + ret i1 %or.cond1 +} + + +define i1 @or_icmp_not_eq(i32 signext %type) { +; CHECK-LABEL: or_icmp_not_eq: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: snez a0, a0 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp ugt i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_xlen(i32 signext %type) { +; CHECK-LABEL: or_icmp_xlen: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a1, a0, -6 +; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: or a0, a1, a0 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 32 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_i64(i64 signext %type) { +; RV32-LABEL: or_icmp_i64: +; RV32: # %bb.0: # %entry +; RV32-NEXT: xori a2, a0, 6 +; RV32-NEXT: or a3, a0, a1 +; RV32-NEXT: xori a0, a0, 15 +; RV32-NEXT: or a2, a2, a1 +; RV32-NEXT: seqz a3, a3 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: seqz a1, a2 +; RV32-NEXT: or a1, a1, a3 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_i64: +; RV64: # %bb.0: # %entry +; RV64-NEXT: addi a1, a0, -6 +; RV64-NEXT: seqz a2, a0 +; RV64-NEXT: addi a0, a0, -15 +; RV64-NEXT: seqz a1, a1 +; RV64-NEXT: or a1, a1, a2 +; RV64-NEXT: seqz a0, a0 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: ret +entry: + %cmp = icmp eq i64 %type, 6 + %cmp1 = icmp eq i64 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp3 = icmp eq i64 %type, 15 + %or.cond1 = or i1 %cmp3, %or.cond + ret i1 %or.cond1 +} + +define i1 @or_icmp_specific(i32 signext %type, i32 signext %type1) { +; CHECK-LABEL: or_icmp_specific: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, -6 +; CHECK-NEXT: addi a1, a1, -32 +; CHECK-NEXT: seqz a0, a0 +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type1, 32 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} From 95514cc36066b52711aacac66871b947c191ade2 Mon Sep 17 00:00:00 2001 From: bababuck Date: Mon, 10 Nov 2025 14:23:02 -0800 Subject: [PATCH 2/7] [RISCV] Lower flag check pattern to use a bitmask-shift The following C-code: bool or_icmp(int type) { return type == 0 || type == 6 || type == 15; } Currently lowers to: define i1 @or_icmp(i32 signext %type) { entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 %or.cond = or i1 %cmp, %cmp1 %cmp3 = icmp eq i32 %type, 15 %or.cond1 = or i1 %cmp3, %or.cond ret i1 %or.cond1 } But more optimally lowers to: define i1 @or_icmp(i32 signext %type) { entry: %srl = lshr i32 32833, %type %srl.1 = trunc i32 %srl to i1 %cmp = icmp ult i32 %type, 64 %and = and i1 %srl.1, %cmp ret i1 %and } --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 49 ++++++ llvm/test/CodeGen/RISCV/flag_check.ll | 157 +++++++++++--------- 2 files changed, 139 insertions(+), 67 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 3b250d7d9ad1f..50076dbb4555e 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16736,6 +16736,52 @@ static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops); } +// or (icmp eq x, imm0), (icmp eq x, imm1) -> czero.eqz (sltui x, 64), (bext x, +// 1 << imm0 | 1 << imm1) If [imm0, imm1] < 64 +static SDValue combineOrOfImmCmpToBitExtract(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + using namespace SDPatternMatch; + + auto CollectSetEqImmTree = [](auto &&Self, SmallVector &FlagVals, + SDNode *N, SDValue &X) -> bool { + APInt Imm; + if (X ? sd_match(N, m_OneUse(m_SetCC(m_Specific(X), m_ConstInt(Imm), + m_SpecificCondCode(ISD::SETEQ)))) + : sd_match(N, m_OneUse(m_SetCC(m_Value(X), m_ConstInt(Imm), + m_SpecificCondCode(ISD::SETEQ))))) { + FlagVals.push_back(Imm); + return true; + } + SDValue LHS, RHS; + if (sd_match(N, m_OneUse(m_Or(m_Value(LHS), m_Value(RHS))))) { + return Self(Self, FlagVals, LHS.getNode(), X) && + Self(Self, FlagVals, RHS.getNode(), X); + } + return false; + }; + + SmallVector FlagVals; + SDValue X; + if (!CollectSetEqImmTree(CollectSetEqImmTree, FlagVals, N, X)) + return SDValue(); + + unsigned XLen = Subtarget.getXLen(); + uint64_t BitMask = 0; + for (auto &Imm : FlagVals) { + if (Imm.uge(XLen)) + return SDValue(); + BitMask |= ((uint64_t)1 << Imm.getZExtValue()); + } + + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue BitExtract = + DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(BitMask, DL, VT), X); + SDValue Lt64Check = + DAG.getSetCC(DL, VT, X, DAG.getConstant(XLen, DL, VT), ISD::SETULT); + return DAG.getNode(ISD::AND, DL, VT, Lt64Check, BitExtract); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; @@ -16748,6 +16794,9 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) return V; + if (DCI.isAfterLegalizeDAG()) + if (SDValue V = combineOrOfImmCmpToBitExtract(N, DAG, Subtarget)) + return V; if (DCI.isAfterLegalizeDAG()) if (SDValue V = combineDeMorganOfBoolean(N, DAG)) diff --git a/llvm/test/CodeGen/RISCV/flag_check.ll b/llvm/test/CodeGen/RISCV/flag_check.ll index 22df3d1859633..86049bf53379c 100644 --- a/llvm/test/CodeGen/RISCV/flag_check.ll +++ b/llvm/test/CodeGen/RISCV/flag_check.ll @@ -3,13 +3,21 @@ ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define i1 @or_icmp_2(i32 signext %type) { -; CHECK-LABEL: or_icmp_2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: ret +; RV32-LABEL: or_icmp_2: +; RV32: # %bb.0: # %entry +; RV32-NEXT: li a1, 65 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: sltiu a0, a0, 32 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_2: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a1, 65 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ret entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 @@ -18,16 +26,23 @@ entry: } define i1 @or_icmp_3(i32 signext %type) { -; CHECK-LABEL: or_icmp_3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a2, a0 -; CHECK-NEXT: addi a0, a0, -15 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: or a1, a1, a2 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: ret +; RV32-LABEL: or_icmp_3: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, 8 +; RV32-NEXT: addi a1, a1, 65 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: sltiu a0, a0, 32 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_3: +; RV64: # %bb.0: # %entry +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: addi a1, a1, 65 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ret entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 @@ -38,19 +53,23 @@ entry: } define i1 @or_icmp_4_tree(i32 signext %type) { -; CHECK-LABEL: or_icmp_4_tree: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a2, a0 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: or a1, a1, a2 -; CHECK-NEXT: addi a2, a0, -15 -; CHECK-NEXT: addi a0, a0, -22 -; CHECK-NEXT: seqz a2, a2 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a2, a0 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: ret +; RV32-LABEL: or_icmp_4_tree: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, 1032 +; RV32-NEXT: addi a1, a1, 65 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: sltiu a0, a0, 32 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_4_tree: +; RV64: # %bb.0: # %entry +; RV64-NEXT: lui a1, 1032 +; RV64-NEXT: addi a1, a1, 65 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ret entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 @@ -63,28 +82,24 @@ entry: } define i1 @or_icmp_7(i32 signext %type) { -; CHECK-LABEL: or_icmp_7: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a2, a0 -; CHECK-NEXT: addi a3, a0, -17 -; CHECK-NEXT: addi a4, a0, -3 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: or a1, a1, a2 -; CHECK-NEXT: addi a2, a0, -31 -; CHECK-NEXT: seqz a3, a3 -; CHECK-NEXT: seqz a4, a4 -; CHECK-NEXT: or a3, a4, a3 -; CHECK-NEXT: addi a4, a0, -14 -; CHECK-NEXT: seqz a2, a2 -; CHECK-NEXT: seqz a4, a4 -; CHECK-NEXT: or a2, a4, a2 -; CHECK-NEXT: addi a0, a0, -28 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a1, a3, a1 -; CHECK-NEXT: or a0, a0, a2 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: ret +; RV32-LABEL: or_icmp_7: +; RV32: # %bb.0: # %entry +; RV32-NEXT: lui a1, 589860 +; RV32-NEXT: addi a1, a1, 73 +; RV32-NEXT: srl a1, a1, a0 +; RV32-NEXT: sltiu a0, a0, 32 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_7: +; RV64: # %bb.0: # %entry +; RV64-NEXT: lui a1, 147465 +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: addi a1, a1, 73 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ret entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 @@ -152,14 +167,24 @@ entry: } define i1 @or_icmp_xlen(i32 signext %type) { -; CHECK-LABEL: or_icmp_xlen: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: addi a0, a0, -32 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: ret +; RV32-LABEL: or_icmp_xlen: +; RV32: # %bb.0: # %entry +; RV32-NEXT: addi a1, a0, -6 +; RV32-NEXT: addi a0, a0, -32 +; RV32-NEXT: seqz a1, a1 +; RV32-NEXT: seqz a0, a0 +; RV32-NEXT: or a0, a1, a0 +; RV32-NEXT: ret +; +; RV64-LABEL: or_icmp_xlen: +; RV64: # %bb.0: # %entry +; RV64-NEXT: li a1, 1 +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: addi a1, a1, 64 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: ret entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 32 @@ -184,13 +209,11 @@ define i1 @or_icmp_i64(i64 signext %type) { ; ; RV64-LABEL: or_icmp_i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: addi a1, a0, -6 -; RV64-NEXT: seqz a2, a0 -; RV64-NEXT: addi a0, a0, -15 -; RV64-NEXT: seqz a1, a1 -; RV64-NEXT: or a1, a1, a2 -; RV64-NEXT: seqz a0, a0 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: addi a1, a1, 65 +; RV64-NEXT: srl a1, a1, a0 +; RV64-NEXT: sltiu a0, a0, 64 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: ret entry: %cmp = icmp eq i64 %type, 6 From ccfc7a84816f00b396f68ae9160770d41addaca9 Mon Sep 17 00:00:00 2001 From: bababuck Date: Wed, 26 Nov 2025 14:41:12 -0800 Subject: [PATCH 3/7] [RISCV] Revert implementation in RISCV Will move to InstCombine --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 49 ---- llvm/test/CodeGen/RISCV/flag_check.ll | 241 -------------------- 2 files changed, 290 deletions(-) delete mode 100644 llvm/test/CodeGen/RISCV/flag_check.ll diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 50076dbb4555e..3b250d7d9ad1f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -16736,52 +16736,6 @@ static SDValue combineOrAndToBitfieldInsert(SDNode *N, SelectionDAG &DAG, return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops); } -// or (icmp eq x, imm0), (icmp eq x, imm1) -> czero.eqz (sltui x, 64), (bext x, -// 1 << imm0 | 1 << imm1) If [imm0, imm1] < 64 -static SDValue combineOrOfImmCmpToBitExtract(SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { - using namespace SDPatternMatch; - - auto CollectSetEqImmTree = [](auto &&Self, SmallVector &FlagVals, - SDNode *N, SDValue &X) -> bool { - APInt Imm; - if (X ? sd_match(N, m_OneUse(m_SetCC(m_Specific(X), m_ConstInt(Imm), - m_SpecificCondCode(ISD::SETEQ)))) - : sd_match(N, m_OneUse(m_SetCC(m_Value(X), m_ConstInt(Imm), - m_SpecificCondCode(ISD::SETEQ))))) { - FlagVals.push_back(Imm); - return true; - } - SDValue LHS, RHS; - if (sd_match(N, m_OneUse(m_Or(m_Value(LHS), m_Value(RHS))))) { - return Self(Self, FlagVals, LHS.getNode(), X) && - Self(Self, FlagVals, RHS.getNode(), X); - } - return false; - }; - - SmallVector FlagVals; - SDValue X; - if (!CollectSetEqImmTree(CollectSetEqImmTree, FlagVals, N, X)) - return SDValue(); - - unsigned XLen = Subtarget.getXLen(); - uint64_t BitMask = 0; - for (auto &Imm : FlagVals) { - if (Imm.uge(XLen)) - return SDValue(); - BitMask |= ((uint64_t)1 << Imm.getZExtValue()); - } - - SDLoc DL(N); - EVT VT = N->getValueType(0); - SDValue BitExtract = - DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(BitMask, DL, VT), X); - SDValue Lt64Check = - DAG.getSetCC(DL, VT, X, DAG.getConstant(XLen, DL, VT), ISD::SETULT); - return DAG.getNode(ISD::AND, DL, VT, Lt64Check, BitExtract); -} - static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; @@ -16794,9 +16748,6 @@ static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) return V; - if (DCI.isAfterLegalizeDAG()) - if (SDValue V = combineOrOfImmCmpToBitExtract(N, DAG, Subtarget)) - return V; if (DCI.isAfterLegalizeDAG()) if (SDValue V = combineDeMorganOfBoolean(N, DAG)) diff --git a/llvm/test/CodeGen/RISCV/flag_check.ll b/llvm/test/CodeGen/RISCV/flag_check.ll deleted file mode 100644 index 86049bf53379c..0000000000000 --- a/llvm/test/CodeGen/RISCV/flag_check.ll +++ /dev/null @@ -1,241 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 - -define i1 @or_icmp_2(i32 signext %type) { -; RV32-LABEL: or_icmp_2: -; RV32: # %bb.0: # %entry -; RV32-NEXT: li a1, 65 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: sltiu a0, a0, 32 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_2: -; RV64: # %bb.0: # %entry -; RV64-NEXT: li a1, 65 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - ret i1 %or.cond -} - -define i1 @or_icmp_3(i32 signext %type) { -; RV32-LABEL: or_icmp_3: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, 8 -; RV32-NEXT: addi a1, a1, 65 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: sltiu a0, a0, 32 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_3: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a1, 8 -; RV64-NEXT: addi a1, a1, 65 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - %cmp3 = icmp eq i32 %type, 15 - %or.cond1 = or i1 %cmp3, %or.cond - ret i1 %or.cond1 -} - -define i1 @or_icmp_4_tree(i32 signext %type) { -; RV32-LABEL: or_icmp_4_tree: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, 1032 -; RV32-NEXT: addi a1, a1, 65 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: sltiu a0, a0, 32 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_4_tree: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a1, 1032 -; RV64-NEXT: addi a1, a1, 65 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - %cmp2 = icmp eq i32 %type, 15 - %cmp3 = icmp eq i32 %type, 22 - %or.cond1 = or i1 %cmp2, %cmp3 - %or.cond2 = or i1 %or.cond1, %or.cond - ret i1 %or.cond2 -} - -define i1 @or_icmp_7(i32 signext %type) { -; RV32-LABEL: or_icmp_7: -; RV32: # %bb.0: # %entry -; RV32-NEXT: lui a1, 589860 -; RV32-NEXT: addi a1, a1, 73 -; RV32-NEXT: srl a1, a1, a0 -; RV32-NEXT: sltiu a0, a0, 32 -; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_7: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a1, 147465 -; RV64-NEXT: slli a1, a1, 2 -; RV64-NEXT: addi a1, a1, 73 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - %cmp2 = icmp eq i32 %type, 17 - %or.cond1 = or i1 %cmp2, %or.cond - %cmp3 = icmp eq i32 %type, 3 - %or.cond2 = or i1 %cmp3, %or.cond1 - %cmp4 = icmp eq i32 %type, 31 - %or.cond3 = or i1 %cmp4, %or.cond2 - %cmp5 = icmp eq i32 %type, 14 - %or.cond4 = or i1 %cmp5, %or.cond3 - %cmp6 = icmp eq i32 %type, 28 - %or.cond5 = or i1 %cmp6, %or.cond4 - ret i1 %or.cond5 -} - -define i1 @or_icmp_gte_64(i32 signext %type) { -; CHECK-LABEL: or_icmp_gte_64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: addi a0, a0, -64 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 64 - %or.cond = or i1 %cmp, %cmp1 - ret i1 %or.cond -} - -define i1 @or_icmp_multiple_uses(i32 signext %type) { -; CHECK-LABEL: or_icmp_multiple_uses: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: xor a0, a1, a0 -; CHECK-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - %or.cond1 = xor i1 %cmp, %or.cond - ret i1 %or.cond1 -} - - -define i1 @or_icmp_not_eq(i32 signext %type) { -; CHECK-LABEL: or_icmp_not_eq: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a1, a0, -6 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: snez a0, a0 -; CHECK-NEXT: or a0, a1, a0 -; CHECK-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp ugt i32 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - ret i1 %or.cond -} - -define i1 @or_icmp_xlen(i32 signext %type) { -; RV32-LABEL: or_icmp_xlen: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi a1, a0, -6 -; RV32-NEXT: addi a0, a0, -32 -; RV32-NEXT: seqz a1, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: or a0, a1, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_xlen: -; RV64: # %bb.0: # %entry -; RV64-NEXT: li a1, 1 -; RV64-NEXT: slli a1, a1, 32 -; RV64-NEXT: addi a1, a1, 64 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type, 32 - %or.cond = or i1 %cmp, %cmp1 - ret i1 %or.cond -} - -define i1 @or_icmp_i64(i64 signext %type) { -; RV32-LABEL: or_icmp_i64: -; RV32: # %bb.0: # %entry -; RV32-NEXT: xori a2, a0, 6 -; RV32-NEXT: or a3, a0, a1 -; RV32-NEXT: xori a0, a0, 15 -; RV32-NEXT: or a2, a2, a1 -; RV32-NEXT: seqz a3, a3 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a1, a2 -; RV32-NEXT: or a1, a1, a3 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: ret -; -; RV64-LABEL: or_icmp_i64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: lui a1, 8 -; RV64-NEXT: addi a1, a1, 65 -; RV64-NEXT: srl a1, a1, a0 -; RV64-NEXT: sltiu a0, a0, 64 -; RV64-NEXT: and a0, a0, a1 -; RV64-NEXT: ret -entry: - %cmp = icmp eq i64 %type, 6 - %cmp1 = icmp eq i64 %type, 0 - %or.cond = or i1 %cmp, %cmp1 - %cmp3 = icmp eq i64 %type, 15 - %or.cond1 = or i1 %cmp3, %or.cond - ret i1 %or.cond1 -} - -define i1 @or_icmp_specific(i32 signext %type, i32 signext %type1) { -; CHECK-LABEL: or_icmp_specific: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi a0, a0, -6 -; CHECK-NEXT: addi a1, a1, -32 -; CHECK-NEXT: seqz a0, a0 -; CHECK-NEXT: seqz a1, a1 -; CHECK-NEXT: or a0, a0, a1 -; CHECK-NEXT: ret -entry: - %cmp = icmp eq i32 %type, 6 - %cmp1 = icmp eq i32 %type1, 32 - %or.cond = or i1 %cmp, %cmp1 - ret i1 %or.cond -} From 3f57238f231361d5b666e90f8a57e3d090ad88ac Mon Sep 17 00:00:00 2001 From: bababuck Date: Tue, 25 Nov 2025 19:21:05 -0800 Subject: [PATCH 4/7] [InstCombine] Add new tests for flag check optimization --- .../test/Transforms/InstCombine/flag_check.ll | 374 ++++++++++++++++++ 1 file changed, 374 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/flag_check.ll diff --git a/llvm/test/Transforms/InstCombine/flag_check.ll b/llvm/test/Transforms/InstCombine/flag_check.ll new file mode 100644 index 0000000000000..fc6c4bd71c96e --- /dev/null +++ b/llvm/test/Transforms/InstCombine/flag_check.ll @@ -0,0 +1,374 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +target datalayout = "n32:64" + +define i1 @or_icmp_2(i32 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_2( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_3(i32 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_3( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 15 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i32 %type, 15 + %or.cond1 = or i1 %cmp2, %or.cond + ret i1 %or.cond1 +} + +define i1 @or_icmp_7(i32 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_7( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 17 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TYPE]], 3 +; CHECK-NEXT: [[OR_COND2:%.*]] = or i1 [[CMP3]], [[OR_COND1]] +; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[TYPE]], 31 +; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[CMP4]], [[OR_COND2]] +; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i32 [[TYPE]], 14 +; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[CMP5]], [[OR_COND3]] +; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[TYPE]], 28 +; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[CMP6]], [[OR_COND4]] +; CHECK-NEXT: ret i1 [[OR_COND5]] +; +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i32 %type, 17 + %or.cond1 = or i1 %cmp2, %or.cond + %cmp3 = icmp eq i32 %type, 3 + %or.cond2 = or i1 %cmp3, %or.cond1 + %cmp4 = icmp eq i32 %type, 31 + %or.cond3 = or i1 %cmp4, %or.cond2 + %cmp5 = icmp eq i32 %type, 14 + %or.cond4 = or i1 %cmp5, %or.cond3 + %cmp6 = icmp eq i32 %type, 28 + %or.cond5 = or i1 %cmp6, %or.cond4 + ret i1 %or.cond5 +} + +; Cannot optimize since Imm > XLen +define i1 @or_icmp_gte_64(i32 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_gte_64( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 64 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 64 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +; Cannot optimize since %type has multiple uses +define i32 @or_icmp_multiple_uses(i32 signext noundef %type) { +; CHECK-LABEL: define i32 @or_icmp_multiple_uses( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 6 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP1]], [[CMP2]] +; CHECK-NEXT: [[CMP_EXT:%.*]] = zext i1 [[CMP1]] to i32 +; CHECK-NEXT: [[OR_COND_EXT:%.*]] = zext i1 [[OR_COND]] to i32 +; CHECK-NEXT: [[ADD:%.*]] = add nuw nsw i32 [[CMP_EXT]], [[OR_COND_EXT]] +; CHECK-NEXT: ret i32 [[ADD]] +; +entry: + %cmp = icmp eq i32 %type, 6 + %cmp1 = icmp eq i32 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp.ext = zext i1 %cmp to i32 + %or.cond.ext = zext i1 %or.cond to i32 + %add = add i32 %cmp.ext, %or.cond.ext + ret i32 %add +} + +; Cannot optimize since not == comparison +define i1 @or_icmp_not_eq(i32 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_not_eq( +; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[TYPE]], -7 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[TMP0]], -5 +; CHECK-NEXT: ret i1 [[CMP1]] +; +entry: + %cmp = icmp ugt i32 %type, 6 + %cmp1 = icmp ult i32 %type, 2 + %or.cond = or i1 %cmp, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_i64(i64 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_i64( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TYPE]], 15 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %cmp = icmp eq i64 %type, 6 + %cmp1 = icmp eq i64 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i64 %type, 15 + %or.cond1 = or i1 %cmp2, %or.cond + ret i1 %or.cond1 +} + +; Cannot optimize since not the same value being compared +define i1 @or_icmp_specific(i64 signext noundef %type, i64 signext noundef %type1, i64 signext noundef %type2) { +; CHECK-LABEL: define i1 @or_icmp_specific( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]], i64 noundef signext [[TYPE1:%.*]], i64 noundef signext [[TYPE2:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE1]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TYPE]], 15 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %cmp = icmp eq i64 %type, 6 + %cmp1 = icmp eq i64 %type1, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i64 %type, 15 + %or.cond1 = or i1 %cmp2, %or.cond + ret i1 %or.cond1 +} + +; Cannot optimize since %type can be undef +define i1 @or_icmp_undef(i64 signext %type) { +; CHECK-LABEL: define i1 @or_icmp_undef( +; CHECK-SAME: i64 signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TYPE]], 15 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %cmp = icmp eq i64 %type, 6 + %cmp1 = icmp eq i64 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i64 %type, 15 + %or.cond1 = or i1 %cmp2, %or.cond + ret i1 %or.cond1 +} + +define i1 @or_icmp_expand(i64 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TYPE]], 7 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[AND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %shr = lshr i64 65, %type + %cmp = icmp ult i64 %type, 7 + %trunc = trunc i64 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i64 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +; Cannot optimize bounds check smaller than largest BitMap bit +define i1 @or_icmp_expand_small_bounds(i64 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_small_bounds( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TYPE]], 3 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[AND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %shr = lshr i64 65, %type + %cmp = icmp ult i64 %type, 3 + %trunc = trunc i64 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i64 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +; Cannot optimize bounds check larger than XLen +define i1 @or_icmp_expand_large_bounds(i64 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_large_bounds( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TYPE]], 65 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i1 +; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[AND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %shr = lshr i64 65, %type + %cmp = icmp ult i64 %type, 65 + %trunc = trunc i64 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i64 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_expand_trunc_type_shr(i128 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_trunc_type_shr( +; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 64 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %type.t = trunc i128 %type to i64 + %shr = lshr i64 65, %type.t + %cmp = icmp ult i128 %type, 64 + %trunc = trunc i64 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i128 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_expand_zext_cmp(i64 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_zext_cmp( +; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TYPE]], 64 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP1]], i1 [[TMP2]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %type.e = zext i64 %type to i128 + %shr = lshr i64 65, %type + %cmp = icmp ult i128 %type.e, 64 + %trunc = trunc i64 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i64 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +define i1 @or_icmp_i128(i128 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_i128( +; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[TYPE]], 6 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 0 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i128 [[TYPE]], 15 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %cmp = icmp eq i128 %type, 6 + %cmp1 = icmp eq i128 %type, 0 + %or.cond = or i1 %cmp, %cmp1 + %cmp2 = icmp eq i128 %type, 15 + %or.cond1 = or i1 %cmp2, %or.cond + ret i1 %or.cond1 +} + +define i1 @or_icmp_expand_128(i128 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_128( +; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i128 65, [[TYPE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 64 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[SHR]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] +; CHECK-NEXT: ret i1 [[OR_COND1]] +; +entry: + %shr = lshr i128 65, %type + %cmp = icmp ult i128 %type, 64 + %trunc = trunc i128 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i128 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} + +; Cannot optimize BitMap lrger than XLen +define i1 @or_icmp_expand_large_bitmap(i128 signext noundef %type) { +; CHECK-LABEL: define i1 @or_icmp_expand_large_bitmap( +; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[SHR:%.*]] = lshr i128 73786976294838206465, [[TYPE]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i128 [[TYPE]], 64 +; CHECK-NEXT: [[TRUNC:%.*]] = trunc i128 [[SHR]] to i1 +; CHECK-NEXT: [[AND1:%.*]] = select i1 [[CMP]], i1 [[TRUNC]], i1 false +; CHECK-NEXT: [[AND:%.*]] = icmp eq i128 [[TYPE]], 35 +; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[AND1]], [[AND]] +; CHECK-NEXT: ret i1 [[OR_COND]] +; +entry: + %shr = lshr i128 73786976294838206465, %type + %cmp = icmp ult i128 %type, 64 + %trunc = trunc i128 %shr to i1 + %and = select i1 %cmp, i1 %trunc, i1 false + %cmp1 = icmp eq i128 %type, 35 + %or.cond = or i1 %and, %cmp1 + ret i1 %or.cond +} From 14c6bad6c17dcb6f7b09c08b2beaecd1282c498f Mon Sep 17 00:00:00 2001 From: bababuck Date: Mon, 10 Nov 2025 14:23:02 -0800 Subject: [PATCH 5/7] [InstCombine] Lower flag check pattern to use a bitmask-shift The following C-code: bool or_icmp(int type) { return type == 0 || type == 6 || type == 15; } Currently lowers to: define i1 @or_icmp(i32 signext %type) { entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 %or.cond = or i1 %cmp, %cmp1 %cmp3 = icmp eq i32 %type, 15 %or.cond1 = or i1 %cmp3, %or.cond ret i1 %or.cond1 } But more optimally lowers to: define i1 @or_icmp(i32 signext %type) { entry: %srl = lshr i32 32833, %type %srl.1 = trunc i32 %srl to i1 %cmp = icmp ult i32 %type, 64 %and = select i1 %cmd, i1 %srl.1, i1 false ret i1 %and } --- .../InstCombine/InstCombineAndOrXor.cpp | 87 +++++++++++++++++++ .../test/Transforms/InstCombine/flag_check.ll | 39 +++++---- 2 files changed, 111 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index ba5568b00441b..eed3a021a05f9 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3119,6 +3119,89 @@ static Instruction *matchFunnelShift(Instruction &Or, InstCombinerImpl &IC) { return nullptr; } +static Value *combineOrOfImmCmpToBitExtract(Instruction &Or, + InstCombiner::BuilderTy &Builder, + const DataLayout &DL) { + + auto isICmpEqImm = [](Value *N, ConstantInt *&Imm, Value *&X) -> bool { + if (X) + return match(N, m_OneUse(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Specific(X), + m_ConstantInt(Imm)))); + + return match(N, m_OneUse(m_SpecificICmp(ICmpInst::ICMP_EQ, m_Value(X), + m_ConstantInt(Imm)))); + }; + + // %srl = lshr %bitmap, %X + // %icmp = icmp ult %X, %max_value + // %trunc = trunc %srl to i1 + // %sel = select %icmp, %trunc, false + auto CreateBitExtractSeq = [&](APInt BitMap, APInt MaxValue, + Value *X) -> Value * { + LLVMContext &Context = Or.getContext(); + + // %srl = lshr %bitmap, %X + // It is okay for the shift amount to be truncated because + // if information is lost then it is garunteed to fail the bounds + // check and the shift result will be discarded + ConstantInt *BitMapConst = ConstantInt::get(Context, BitMap); + Value *ShiftAmt = + Builder.CreateZExtOrTrunc(X, BitMapConst->getIntegerType()); + Value *LShr = Builder.CreateLShr(BitMapConst, ShiftAmt); + + // %icmp = icmp ult %X, %max_value + // Use the type that is the larger of 'X' and the bounds integer + // so that no information is lost + Value *MaxVal = ConstantInt::get(Context, MaxValue); + if (MaxVal->getType()->getIntegerBitWidth() > + X->getType()->getIntegerBitWidth()) + X = Builder.CreateZExt(X, MaxVal->getType()); + else + MaxVal = Builder.CreateZExt(MaxVal, X->getType()); + Value *BoundsCheck = Builder.CreateICmp(ICmpInst::ICMP_ULT, X, MaxVal); + + // %trunc = trunc %srl to i1 + // Only care about the low bit + Value *ShrTrunc = Builder.CreateTrunc(LShr, IntegerType::get(Context, 1)); + + // %sel = select %icmp, %trunc, false + return Builder.CreateSelect(BoundsCheck, ShrTrunc, + ConstantInt::getFalse(Context)); + }; + + // Our BitMap should be able to fit into a single arch register + // otherwise the tranformation won't be profitable + unsigned XLen = DL.getLargestLegalIntTypeSizeInBits(); + auto validImm = [&](APInt APImm) -> bool { + auto Imm = APImm.tryZExtValue(); + return Imm && (*Imm < XLen); + }; + + // Match (or (icmp eq X, Imm0), (icmp eq X, Imm1)) + ConstantInt *LHS, *RHS; + Value *X = nullptr; + if (isICmpEqImm(Or.getOperand(0), LHS, X) && + isICmpEqImm(Or.getOperand(1), RHS, X)) { + // The Shr with become poison when shifted by Undef + if (!isGuaranteedNotToBeUndefOrPoison(X)) + return nullptr; + + APInt LHSAP = LHS->getValue(); + APInt RHSAP = RHS->getValue(); + if (!validImm(LHSAP) || !validImm(RHSAP)) + return nullptr; + LHSAP = LHSAP.zextOrTrunc(XLen); + RHSAP = RHSAP.zextOrTrunc(XLen); + + // Create the BitMap and Bounds check immediates + // +1 to bound becuase strictly less than + APInt BitMap = (APInt(XLen, 1) << LHSAP) | (APInt(XLen, 1) << RHSAP); + APInt Bound = RHSAP.ugt(LHSAP) ? RHSAP : LHSAP; + return CreateBitExtractSeq(BitMap, Bound + 1, X); + } + return nullptr; +} + /// Attempt to combine or(zext(x),shl(zext(y),bw/2) concat packing patterns. static Value *matchOrConcat(Instruction &Or, InstCombiner::BuilderTy &Builder) { assert(Or.getOpcode() == Instruction::Or && "bswap requires an 'or'"); @@ -4084,6 +4167,10 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *Funnel = matchFunnelShift(I, *this)) return Funnel; + if (Value *BitExtract = + combineOrOfImmCmpToBitExtract(I, Builder, getDataLayout())) + return replaceInstUsesWith(I, BitExtract); + if (Value *Concat = matchOrConcat(I, Builder)) return replaceInstUsesWith(I, Concat); diff --git a/llvm/test/Transforms/InstCombine/flag_check.ll b/llvm/test/Transforms/InstCombine/flag_check.ll index fc6c4bd71c96e..f8b3f53d6f002 100644 --- a/llvm/test/Transforms/InstCombine/flag_check.ll +++ b/llvm/test/Transforms/InstCombine/flag_check.ll @@ -7,9 +7,11 @@ define i1 @or_icmp_2(i32 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_2( ; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false ; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: @@ -23,9 +25,11 @@ define i1 @or_icmp_3(i32 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_3( ; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 15 ; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] ; CHECK-NEXT: ret i1 [[OR_COND1]] @@ -43,9 +47,11 @@ define i1 @or_icmp_7(i32 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_7( ; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TYPE]], 6 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32 [[TYPE]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 17 ; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] ; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TYPE]], 3 @@ -135,9 +141,10 @@ define i1 @or_icmp_i64(i64 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_i64( ; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[TYPE]], 6 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TYPE]], 7 +; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP1]], i1 [[TMP2]], i1 false ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TYPE]], 15 ; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] ; CHECK-NEXT: ret i1 [[OR_COND1]] @@ -312,9 +319,11 @@ define i1 @or_icmp_i128(i128 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_i128( ; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i128 [[TYPE]], 6 -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 0 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[CMP]], [[CMP1]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 7 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 +; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false ; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i128 [[TYPE]], 15 ; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] ; CHECK-NEXT: ret i1 [[OR_COND1]] From 4352ec0e2bd9d5ff004a62b982536d2d1f4e34e9 Mon Sep 17 00:00:00 2001 From: bababuck Date: Mon, 10 Nov 2025 14:23:02 -0800 Subject: [PATCH 6/7] [InstCombine] Expand flag check patterns The following C-code: bool or_icmp(int type) { return type == 0 || type == 6 || type == 15; } Currently lowers to: define i1 @or_icmp(i32 signext %type) { entry: %cmp = icmp eq i32 %type, 6 %cmp1 = icmp eq i32 %type, 0 %or.cond = or i1 %cmp, %cmp1 %cmp3 = icmp eq i32 %type, 15 %or.cond1 = or i1 %cmp3, %or.cond ret i1 %or.cond1 } But more optimally lowers to: define i1 @or_icmp(i32 signext %type) { entry: %srl = lshr i32 32833, %type %srl.1 = trunc i32 %srl to i1 %cmp = icmp ult i32 %type, 64 %and = select i1 %cmd, i1 %srl.1, i1 false ret i1 %and } --- .../InstCombine/InstCombineAndOrXor.cpp | 38 ++++++++++ .../test/Transforms/InstCombine/flag_check.ll | 69 +++++++------------ 2 files changed, 61 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index eed3a021a05f9..113dc971bb88d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3199,6 +3199,44 @@ static Value *combineOrOfImmCmpToBitExtract(Instruction &Or, APInt Bound = RHSAP.ugt(LHSAP) ? RHSAP : LHSAP; return CreateBitExtractSeq(BitMap, Bound + 1, X); } + + // Expand an already existing BitMap sequence + // Match: (or (%BitMapSeq(X)), (icmp eq X, Imm)) + ConstantInt *BitMap, *Bound, *CmpImm; + Value *Cmp; + if (match(&Or, m_OneUse(m_c_Or(m_Value(Cmp), + m_OneUse(m_Select( + m_SpecificICmp(ICmpInst::ICMP_ULT, + m_ZExtOrSelf(m_Value(X)), + m_ConstantInt(Bound)), + m_OneUse(m_Trunc(m_OneUse(m_Shr( + m_ConstantInt(BitMap), + m_ZExtOrTruncOrSelf(m_Deferred(X)))))), + m_Zero()))))) && + isICmpEqImm(Cmp, CmpImm, X)) { + if (!isGuaranteedNotToBeUndefOrPoison(X)) + return nullptr; + + APInt NewAP = CmpImm->getValue(); + APInt BitMapAP = BitMap->getValue(); + APInt BoundAP = Bound->getValue().zextOrTrunc(XLen); + // BitMap must fit in native arch register + if (!validImm(NewAP) || !DL.fitsInLegalInteger(BitMapAP.getActiveBits())) + return nullptr; + + NewAP = NewAP.zextOrTrunc(XLen); + BitMapAP = BitMapAP.zextOrTrunc(XLen); + + // Bounding immediate must be greater than the largest bit in the BitMap + // and less then XLen + if (BoundAP.ult(BitMapAP.getActiveBits()) || BoundAP.ugt(XLen)) + return nullptr; + + if (NewAP.uge(BoundAP)) + BoundAP = NewAP + 1; + BitMapAP |= (APInt(XLen, 1) << NewAP); + return CreateBitExtractSeq(BitMapAP, BoundAP, X); + } return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/flag_check.ll b/llvm/test/Transforms/InstCombine/flag_check.ll index f8b3f53d6f002..4f4dcf102aee0 100644 --- a/llvm/test/Transforms/InstCombine/flag_check.ll +++ b/llvm/test/Transforms/InstCombine/flag_check.ll @@ -26,13 +26,11 @@ define i1 @or_icmp_3(i32 signext noundef %type) { ; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TYPE]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 32833, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 15 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %cmp = icmp eq i32 %type, 6 @@ -48,21 +46,11 @@ define i1 @or_icmp_7(i32 signext noundef %type) { ; CHECK-SAME: i32 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[TYPE]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 2416066633, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TYPE]], 32 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[TYPE]], 17 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[TYPE]], 3 -; CHECK-NEXT: [[OR_COND2:%.*]] = or i1 [[CMP3]], [[OR_COND1]] -; CHECK-NEXT: [[CMP4:%.*]] = icmp eq i32 [[TYPE]], 31 -; CHECK-NEXT: [[OR_COND3:%.*]] = or i1 [[CMP4]], [[OR_COND2]] -; CHECK-NEXT: [[CMP5:%.*]] = icmp eq i32 [[TYPE]], 14 -; CHECK-NEXT: [[OR_COND4:%.*]] = or i1 [[CMP5]], [[OR_COND3]] -; CHECK-NEXT: [[CMP6:%.*]] = icmp eq i32 [[TYPE]], 28 -; CHECK-NEXT: [[OR_COND5:%.*]] = or i1 [[CMP6]], [[OR_COND4]] -; CHECK-NEXT: ret i1 [[OR_COND5]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %cmp = icmp eq i32 %type, 6 @@ -141,13 +129,11 @@ define i1 @or_icmp_i64(i64 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_i64( ; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 65, [[TYPE]] -; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TYPE]], 7 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 32833, [[TYPE]] +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TYPE]], 16 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP1]], i1 [[TMP2]], i1 false -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i64 [[TYPE]], 15 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %cmp = icmp eq i64 %type, 6 @@ -204,13 +190,11 @@ define i1 @or_icmp_expand(i64 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_expand( ; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i64 65, [[TYPE]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TYPE]], 7 +; CHECK-NEXT: [[SHR:%.*]] = lshr i64 34359738433, [[TYPE]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[TYPE]], 36 ; CHECK-NEXT: [[TRUNC:%.*]] = trunc i64 [[SHR]] to i1 ; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP]], i1 [[TRUNC]], i1 false -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 -; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[AND]], [[CMP1]] -; CHECK-NEXT: ret i1 [[OR_COND]] +; CHECK-NEXT: ret i1 [[AND]] ; entry: %shr = lshr i64 65, %type @@ -273,13 +257,11 @@ define i1 @or_icmp_expand_trunc_type_shr(i128 signext noundef %type) { ; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TYPE]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 34359738433, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 64 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 35 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %type.t = trunc i128 %type to i64 @@ -296,13 +278,11 @@ define i1 @or_icmp_expand_zext_cmp(i64 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_expand_zext_cmp( ; CHECK-SAME: i64 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 65, [[TYPE]] +; CHECK-NEXT: [[TMP0:%.*]] = lshr i64 34359738433, [[TYPE]] ; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TYPE]], 64 ; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP1]], i1 [[TMP2]], i1 false -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i64 [[TYPE]], 35 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %type.e = zext i64 %type to i128 @@ -320,13 +300,11 @@ define i1 @or_icmp_i128(i128 signext noundef %type) { ; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TYPE]] to i64 -; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 65, [[TMP0]] -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 32833, [[TMP0]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i128 [[TYPE]], 15 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[CMP2]], [[OR_COND]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %cmp = icmp eq i128 %type, 6 @@ -341,13 +319,12 @@ define i1 @or_icmp_expand_128(i128 signext noundef %type) { ; CHECK-LABEL: define i1 @or_icmp_expand_128( ; CHECK-SAME: i128 noundef signext [[TYPE:%.*]]) { ; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[SHR:%.*]] = lshr i128 65, [[TYPE]] +; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[TYPE]] to i64 +; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 34359738433, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i128 [[TYPE]], 64 -; CHECK-NEXT: [[TMP3:%.*]] = trunc i128 [[SHR]] to i1 +; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[TMP1]] to i1 ; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[TMP2]], i1 [[TMP3]], i1 false -; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i128 [[TYPE]], 35 -; CHECK-NEXT: [[OR_COND1:%.*]] = or i1 [[OR_COND]], [[CMP1]] -; CHECK-NEXT: ret i1 [[OR_COND1]] +; CHECK-NEXT: ret i1 [[OR_COND]] ; entry: %shr = lshr i128 65, %type From 2090e8f6fdacd8f8fbfa45dff24320da8e4c22d9 Mon Sep 17 00:00:00 2001 From: bababuck Date: Wed, 26 Nov 2025 15:08:02 -0800 Subject: [PATCH 7/7] Remove undef in comment to pass formatter --- llvm/test/Transforms/InstCombine/flag_check.ll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/test/Transforms/InstCombine/flag_check.ll b/llvm/test/Transforms/InstCombine/flag_check.ll index 4f4dcf102aee0..ccf150c311f8d 100644 --- a/llvm/test/Transforms/InstCombine/flag_check.ll +++ b/llvm/test/Transforms/InstCombine/flag_check.ll @@ -165,7 +165,7 @@ entry: ret i1 %or.cond1 } -; Cannot optimize since %type can be undef +; Cannot optimize since %type can be un-def define i1 @or_icmp_undef(i64 signext %type) { ; CHECK-LABEL: define i1 @or_icmp_undef( ; CHECK-SAME: i64 signext [[TYPE:%.*]]) {