Skip to content

Conversation

@yafet-a
Copy link
Contributor

@yafet-a yafet-a commented Jul 18, 2025

Fixes #97044

Optimizations added:
((x&y&~z) | (x&~y&z) | (~x&~y&~z) | (x&y&z))~((y | z) ^ x)
((~z) & ((x&y) | (~x&~y))) ^ (z & ((x&y) | (x&~y)))~((y | z) ^ x)
((x&y) | (~x&~y)) ^ (z & (((x&y) | (~x&~y)) ^ ((x&y) | (x&~y))))~((y | z) ^ x)

Tests Pre-Commited in order to view the effect of the canocalization.

I have also provided Alive2 Proofs here - https://alive2.llvm.org/ce/z/RwhZzS

Results :

  • All 4 test expressions now produce identical optimal assembly (2 instructions - orr, eon)
  • Significant improvement in certain cases from 10+ instructions to just the two for complex cases such as test0 (mentioned in the original issue and included in the new test added).
  • Matches/exceeds GCC's optimisation quality on these patterns where it was unable to before - https://godbolt.org/z/bKejnjo7o

@yafet-a yafet-a requested a review from nikic as a code owner July 18, 2025 15:13
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jul 18, 2025
@llvmbot
Copy link
Member

llvmbot commented Jul 18, 2025

@llvm/pr-subscribers-llvm-transforms

Author: (yafet-a)

Changes

Fixes #97044

Optimizations added:
((x&y&~z) | (x&~y&z) | (~x&~y&~z) | (x&y&z)) → ~((y | z) ^ x)
((~z) & ((x&y) | (~x&~y))) ^ (z & ((x&y) | (x&~y)))~((y | z) ^ x)
((x&y) | (~x&~y)) ^ (z & (((x&y) | (~x&~y)) ^ ((x&y) | (x&~y)))) → ~((y | z) ^ x)

Results :

  • All 4 test expressions now produce identical optimal assembly (3 instructions)
  • Significant improvement from 10+ instructions to just 2 computational instructions (orr, eon) for complex cases such as test0 (mentioned in the original issue and included in the new test added).
  • Matches/exceeds GCC's optimisation quality on these patterns where it was unable to before - https://godbolt.org/z/bKejnjo7o

Full diff: https://github.com/llvm/llvm-project/pull/149530.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+50)
  • (added) llvm/test/Transforms/InstCombine/pr97044.ll (+94)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3beda6bc5ba38..5d9bee7b30fad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2139,6 +2139,44 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
           X);
   }
 
+  // ((~Z) & ((X & Y) | (~X & ~Y))) ^ (Z & ((X & Y) | (X & ~Y))) -> ~((Y | Z) ^ X)
+  {
+    {
+      Value *X, *Y, *Z;
+      Value *SomethingOrZ, *ZAndX;
+      
+      if (match(&I, m_c_Xor(m_Value(SomethingOrZ), m_Value(ZAndX))) &&
+          match(ZAndX, m_And(m_Value(Z), m_Value(X))) &&
+          match(SomethingOrZ, m_Or(m_Value(), m_Specific(Z)))) {        
+        Value *Something;
+        if (match(SomethingOrZ, m_Or(m_Value(Something), m_Specific(Z))) &&
+            match(Something, m_Xor(m_Specific(X), m_Value(Y)))) {
+          Value *YOrZ = Builder.CreateOr(Y, Z);
+          Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+          return BinaryOperator::CreateNot(YOrZXorX);
+        }
+      }
+    }
+    
+    // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X & ~Y)))) -> ~((Y | Z) ^ X)
+    if (match(Op1, m_AllOnes())) {      
+      Value *X, *Y, *Z;
+      Value *XorWithY;
+      if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {        
+        Value *ZAndNotY;
+        if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {          
+          Value *NotY;
+          if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
+              match(NotY, m_Not(m_Specific(Y)))) {            
+            Value *YOrZ = Builder.CreateOr(Y, Z);
+            Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+            return BinaryOperator::CreateNot(YOrZXorX);
+          }
+        }
+      }
+    }
+  }
+
   return nullptr;
 }
 
@@ -3780,6 +3818,18 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  
+  // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^ X)
+  {
+    Value *X, *Y, *Z;
+    Value *Term1, *Term2, *XAndYAndZ;
+    if (match(&I, m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
+        match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
+      Value *YOrZ = Builder.CreateOr(Y, Z);
+      Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+      return BinaryOperator::CreateNot(YOrZXorX);
+    }
+  }
   Type *Ty = I.getType();
   if (Ty->isIntOrIntVectorTy(1)) {
     if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
new file mode 100644
index 0000000000000..4e45f88956d89
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Tests for GitHub issue #97044 - Boolean instruction canonicalization
+; All expressions should optimise to the same canonical form: ~((y | z) ^ x)
+
+define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test0_4way_or(
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %not = xor i32 %z, -1
+  %and = and i32 %y, %not
+  %and1 = and i32 %and, %x
+  %not2 = xor i32 %y, -1
+  %and3 = and i32 %x, %not2
+  %and4 = and i32 %and3, %z
+  %or = or i32 %and1, %and4
+  %not5 = xor i32 %x, -1
+  %not6 = xor i32 %y, -1
+  %and7 = and i32 %not5, %not6
+  %not8 = xor i32 %z, -1
+  %and9 = and i32 %and7, %not8
+  %or10 = or i32 %or, %and9
+  %and11 = and i32 %x, %y
+  %and12 = and i32 %and11, %z
+  %or13 = or i32 %or10, %and12
+  ret i32 %or13
+}
+
+define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1_xor_pattern(
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[AND4_DEMORGAN:%.*]] = or i32 [[TMP2]], [[Z:%.*]]
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z]], [[TMP1]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
+; CHECK-NEXT:    [[TMP3:%.*]] = or i32 [[AND8]], [[TMP4]]
+; CHECK-NEXT:    ret i32 [[TMP3]]
+;
+  %not = xor i32 %z, -1
+  %and = and i32 %x, %y
+  %not1 = xor i32 %x, -1
+  %not2 = xor i32 %y, -1
+  %and3 = and i32 %not1, %not2
+  %or = or i32 %and, %and3
+  %and4 = and i32 %not, %or
+  %and5 = and i32 %x, %y
+  %and6 = and i32 %x, %not2
+  %or7 = or i32 %and5, %and6
+  %and8 = and i32 %z, %or7
+  %xor = xor i32 %and4, %and8
+  ret i32 %xor
+}
+
+define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2_nested_xor(
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i32 [[TMP2:%.*]], -1
+; CHECK-NEXT:    [[AND8:%.*]] = and i32 [[Z:%.*]], [[TMP3]]
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %and = and i32 %x, %y
+  %not = xor i32 %x, -1
+  %not1 = xor i32 %y, -1
+  %and2 = and i32 %not, %not1
+  %or = or i32 %and, %and2
+  %and3 = and i32 %x, %y
+  %not4 = xor i32 %y, -1
+  %and5 = and i32 %x, %not4
+  %or6 = or i32 %and3, %and5
+  %xor = xor i32 %or, %or6
+  %not7 = xor i32 %y, -1
+  %and8 = and i32 %z, %not7
+  %and9 = and i32 %xor, %and8
+  %xor10 = xor i32 %or, %and9
+  %xor11 = xor i32 %xor10, %y
+  %xor12 = xor i32 %xor11, -1
+  ret i32 %xor12
+}
+
+define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test3_already_optimal(
+; CHECK-NEXT:    [[OR:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT:    [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]]
+; CHECK-NEXT:    [[NOT:%.*]] = xor i32 [[XOR]], -1
+; CHECK-NEXT:    ret i32 [[NOT]]
+;
+  %or = or i32 %y, %z
+  %xor = xor i32 %or, %x
+  %not = xor i32 %xor, -1
+  ret i32 %not
+}

@github-actions
Copy link

github-actions bot commented Jul 18, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@yafet-a yafet-a force-pushed the users/yafet-a/boolean-optimisation branch from 7c960cb to eb53480 Compare July 18, 2025 16:05
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't accept complex patterns without real-world usefulness: https://llvm.org/docs/InstCombineContributorGuide.html#real-world-usefulness

Currently, the original case can be simplified into a smaller expression: https://godbolt.org/z/En3za8Gjz. Please check if we can fold it into the target pattern through a simpler transformation (2-3 instructions). Then you need to demonstrate its real-world usefulness.

Reducing the size of an expression is not the business of optimizing compilers. Instead, it is widely used by decompilers. See also https://docs.hex-rays.com/user-guide/decompiler/goomba.

@yafet-a yafet-a force-pushed the users/yafet-a/boolean-optimisation branch 2 times, most recently from 60f76f9 to 9aef3f0 Compare July 21, 2025 12:45
@yafet-a yafet-a force-pushed the users/yafet-a/boolean-optimisation branch from 9aef3f0 to a95e12b Compare July 22, 2025 12:51
@yafet-a yafet-a force-pushed the users/yafet-a/boolean-optimisation branch from a95e12b to db43e0a Compare July 29, 2025 15:39
@yafet-a yafet-a force-pushed the users/yafet-a/boolean-optimisation branch from db43e0a to 02807e3 Compare July 29, 2025 15:49
@yafet-a
Copy link
Contributor Author

yafet-a commented Jul 29, 2025

Pull Request: Response to Changes Requested and Discussion in #97044

Design Change: Pattern Matching → Truth Table Approach

Before:

  • Manual pattern matching with hardcoded transformations
  • No support for 3 input table to logic

After:

  • Generalized truth table canonicalization using 3 input truth tables (similar to createLogicFromTruthTable)
  • Separates: extract variables → build truth table → canonicalize
  • Extensible for other 3 input examples (can be extended similarly to X86's simplifyTernaryLogic()

Key Changes

  • Added functions:

    • extractThreeVariables()
    • extractThreeBitTruthTable() - evaluates all 8 input combinations
    • createLogicFromTable3Var() - maps to canonical forms
    • foldThreeVarBoolExpr() - calls the above 3
  • Currently implements cases needed for issue [LLVM] Missed optimization on boolean instructions. #97044:

    • 0xE1: ~((Op1 | Op2) ^ Op0)
    • 0x60: Op0 & (Op1 ^ Op2)
    • 0xD2: ((Op1 | Op2) ^ Op0) ^ Op1

Testing & Validation

Future Extensibility

Framework can support all 256 possible 3-variable boolean functions. For now may be better to see where this goes and incrementally enable addition of new cases with time.

@yafet-a yafet-a requested a review from dtcxzyw July 29, 2025 16:23
@yafet-a
Copy link
Contributor Author

yafet-a commented Aug 5, 2025

Hi @dtcxzyw, gentle ping but I have implemented a version of the table you suggested in issue #97044 and covers/solves the use case described in that issue

I am fine to convert an expression tree with <=3 leaf nodes into a truth table, then fold it into the optimal form. We have a similar thing called createLogicFromTable in InstCombine.
...
I'd like to add a separate function like create3BitLogicFromTable
...

@yafet-a yafet-a changed the title [InstCombine] Canoncalize complex boolean expressions into ~((y | z) ^ x) [InstCombine] Canoncalize complex boolean expressions into ~((y | z) ^ x) via 3-bit truth table Aug 6, 2025
@yafet-a yafet-a changed the title [InstCombine] Canoncalize complex boolean expressions into ~((y | z) ^ x) via 3-bit truth table [InstCombine] Canoncalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table Aug 6, 2025
@nikic nikic changed the title [InstCombine] Canoncalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table [InstCombine] Canonicalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table Aug 6, 2025
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Crash reproducer:

; bin/opt -passes=instcombine test.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define void @main(<4 x i1> %broadcast.splatinsert, <4 x i1> %broadcast.splat, <4 x i1> %0, ptr %p) {
entry:
  %1 = xor <4 x i1> %broadcast.splatinsert, %0
  %2 = xor <4 x i1> %broadcast.splat, %broadcast.splatinsert
  %3 = zext <4 x i1> %1 to <4 x i32>
  %4 = zext <4 x i1> %2 to <4 x i32>
  %bin.rdx = or <4 x i32> %4, %3
  %5 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %bin.rdx)
  br label %for.cond5.preheader.i.i

for.cond5.preheader.i.i:                          ; preds = %entry
  store i32 %5, ptr %p, align 4
  ret void
}
opt: /home/dtcxzyw/WorkSpace/Projects/compilers/llvm-project/llvm/lib/IR/Instruction.cpp:345: bool llvm::Instruction::comesBefore(const llvm::Instruction*) const: Assertion `getParent() == Other->getParent() && "cross-BB instruction order comparison"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0.      Program arguments: bin/opt -passes=instcombine reduced.ll -S
1.      Running pass "function(instcombine<max-iterations=1;verify-fixpoint>)" on module "reduced.ll"
2.      Running pass "instcombine<max-iterations=1;verify-fixpoint>" on function "main"
 #0 0x00007da9aae2eb62 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.22.0git+0x22eb62)
 #1 0x00007da9aae2b12f llvm::sys::RunSignalHandlers() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.22.0git+0x22b12f)
 #2 0x00007da9aae2b27c SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
 #3 0x00007da9aa845330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
 #4 0x00007da9aa89eb2c __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
 #5 0x00007da9aa89eb2c __pthread_kill_internal ./nptl/pthread_kill.c:78:10
 #6 0x00007da9aa89eb2c pthread_kill ./nptl/pthread_kill.c:89:10
 #7 0x00007da9aa84527e raise ./signal/../sysdeps/posix/raise.c:27:6
 #8 0x00007da9aa8288ff abort ./stdlib/abort.c:81:7
 #9 0x00007da9aa82881b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007da9aa83b517 (/lib/x86_64-linux-gnu/libc.so.6+0x3b517)
#11 0x00007da9a147ea93 (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x27ea93)
#12 0x00007da9a268a638 void std::__insertion_sort<llvm::Instruction**, __gnu_cxx::__ops::_Iter_comp_iter<extractThreeVariablesAndInstructions(llvm::Value*, llvm::SmallVectorImpl<llvm::Instruction*>&)::'lambda0'(llvm::Instruction*, llvm::Instruction*)>>(llvm::Instruction**, llvm::Instruction**, __gnu_cxx::__ops::_Iter_comp_iter<extractThreeVariablesAndInstructions(llvm::Value*, llvm::SmallVectorImpl<llvm::Instruction*>&)::'lambda0'(llvm::Instruction*, llvm::Instruction*)>) (.constprop.0) InstCombineAndOrXor.cpp:0:0
#13 0x00007da9a26ab5a7 foldThreeVarBoolExpr(llvm::Instruction&, llvm::IRBuilder<llvm::TargetFolder, llvm::IRBuilderCallbackInserter>&) InstCombineAndOrXor.cpp:0:0
#14 0x00007da9a26af3c7 llvm::InstCombinerImpl::visitOr(llvm::BinaryOperator&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0xaf3c7)
#15 0x00007da9a2668398 llvm::InstCombinerImpl::run() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0x68398)
#16 0x00007da9a2669521 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#17 0x00007da9a266a564 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0x6a564)
#18 0x00007da9a4daa0e5 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libPolly.so.22.0git+0x1aa0e5)
#19 0x00007da9a1522069 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x322069)
#20 0x00007da9a98dd6c5 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMX86CodeGen.so.22.0git+0xdd6c5)
#21 0x00007da9a1522582 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x322582)
#22 0x00007da9ab091585 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x20585)
#23 0x00007da9a15238ad llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x3238ad)
#24 0x00007da9ab09e86e llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x2d86e)
#25 0x00007da9ab0a9a6a optMain (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x38a6a)
#26 0x00007da9aa82a1ca __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#27 0x00007da9aa82a28b call_init ./csu/../csu/libc-start.c:128:20
#28 0x00007da9aa82a28b __libc_start_main ./csu/../csu/libc-start.c:347:5
#29 0x0000605378783095 _start (bin/opt+0x1095)

Aborted (core dumped)

@yafet-a
Copy link
Contributor Author

yafet-a commented Aug 26, 2025

Crash reproducer:

; bin/opt -passes=instcombine test.ll -S
; bin/opt -passes=instcombine test.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
...

Thanks for this, since we are now extracting both variables and instructions I would similarly need to ensure not just variables but the computation instructions are in the same BB before using comesBefore(). The fix for this has been implemented now in the latest commit

@dtcxzyw
Copy link
Member

dtcxzyw commented Aug 27, 2025

Infinite loop reproducer:

; bin/opt -passes=instcombine reduced.ll -S
define i32 @func_198(ptr %l_306, ptr %arrayidx287, ptr %p) {
entry:
  br label %lbl_335

lbl_335:                                          ; preds = %lbl_335, %entry
  %l_307.0 = phi i32 [ 0, %entry ], [ %inc286, %lbl_335 ]
  %inc286 = add i32 %l_307.0, 1
  %0 = load i32, ptr %arrayidx287, align 4
  %1 = load i32, ptr %l_306, align 4
  %xor288 = xor i32 %0, %1
  %and289 = and i32 %inc286, %xor288
  %conv290 = trunc i32 %and289 to i8
  store i8 %conv290, ptr %p, align 1
  br label %lbl_335
}

@yafet-a
Copy link
Contributor Author

yafet-a commented Aug 29, 2025

Infinite loop reproducer:

; bin/opt -passes=instcombine reduced.ll -S
define i32 @func_198(ptr %l_306, ptr %arrayidx287, ptr %p) {
...

Thanks! this was definitely a byproduct of the change to treat non bitwise logic ops as leaf nodes. I've added a structural similarity check in the entry function now in the latest commit. It should prevent issues like this now while allowing us to still treat them as leaf nodes. This has been replaced with a simpler check for root operands not being treated as leaf variables. It is more efficient than the similarity check, since it can bail at the variable extraction rather than post evaluation

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't have further comments. Please wait for additional approval from other reviewers.

@madhur13490
Copy link
Contributor

Some nit comments but largely LGTM.

@yafet-a
Copy link
Contributor Author

yafet-a commented Sep 1, 2025

@nikic are you happy with the changes?

@yafet-a
Copy link
Contributor Author

yafet-a commented Sep 9, 2025

Hi @nikic, just following up on this PR. Are you free to take a look when you get a moment.

@ElvinaYakubova
Copy link
Contributor

Hi @nikic, I'm taking over this MR since Yafet has finished his internship. Could you please take a look?

@ElvinaYakubova
Copy link
Contributor

@nikic Sorry for pinging you again, could you please check this when you have time

@ElvinaYakubova
Copy link
Contributor

@nikic gentle ping again

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the delay. My main question is about the SortedVars.

default:
return nullptr;
case 0x00: // Always FALSE
Result = FoldConstant(false);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Result = FoldConstant(false);
Result = ConstantInt::getFalse(Op0->getType());

I don't think we need a helper function for this... (also for true below).

static Value *foldThreeVarBoolExpr(Instruction &Root,
InstCombiner::BuilderTy &Builder) {

auto &BO = cast<BinaryOperator>(Root);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Directly accept BinaryOperator as the argument? (It will downcast to Instruction where needed.)

/// combination.
static std::optional<std::bitset<8>>
evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2,
const SmallVector<Instruction *> &Instructions) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const SmallVector<Instruction *> &Instructions) {
ArrayRef<Instruction *> Instructions) {

default:
llvm_unreachable("Unexpected opcode in boolean expression evaluation");
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about the case where it's neither Not nor BinaryOperator? I assume it can't happen, in which case we should make that dyn_cast above a cast.

// Traverse root operands to avoid treating them as leaf variables to prevent
// infinite cycles.
if (auto *RootInst = dyn_cast<Instruction>(Root))
for (Use &U : RootInst->operands())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (Use &U : RootInst->operands())
for (Value *Op : RootInst->operands())

If you're not using the Use, prefer directly iterating over Value *.

}
if (auto *BO = dyn_cast<BinaryOperator>(V)) {
if (!BO->isBitwiseLogicOp()) {
if (V == Root)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this happen? Wouldn't this imply root is a non-bitwise op? I think this can be an assert.

// Check that all instructions (both variables and computation instructions)
// are in the same BB.
SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end());
BasicBlock *FirstBB = nullptr;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can simplify this by always comparing to Root->getParent() (after changing Root to an Instruction argument).


for (Instruction *I : Instructions) {
Value *NotV;
bool IsNot = match(I, m_Not(m_Value(NotV)));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Inline IsNot used in one place.

// pattern matcher).
return {nullptr, nullptr, nullptr};
}
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do I understand correctly that what this actually does is discarding cases with constant operands, as everything else should have already been handled in the initial loop? If so, can we bail out on constant operands there already?

return cast<Argument>(A)->getArgNo() < cast<Argument>(B)->getArgNo();

return cast<Instruction>(A)->comesBefore(cast<Instruction>(B));
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reason for sorting the instructions is obvious, but I don't really understand why we need/want sorted variables.

Is this working around the fact that createLogicFromTable3Var only handles a subset of all cases and thus different variable order will fail the transform? If so, I don't think this is a good idea, as the result will be very fragile. E.g. if I take the first test and do this change, it's going to fail:

-define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+define i32 @test0_4way_or(i32 %z, i32 %y, i32 %x) {

I think if we do any sorting for the variables it needs to happen at the level of createLogicFromTable3Var(), where we can e.g. try to swap variables to avoid having to handle all 255 cases, and only handle the non-commuted ones.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[LLVM] Missed optimization on boolean instructions.

6 participants