-
Notifications
You must be signed in to change notification settings - Fork 15k
[InstCombine] Canonicalize complex boolean expressions into ~((y | z) ^ x) via 3-input truth table #149530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: (yafet-a) ChangesFixes #97044 Optimizations added: Results :
Full diff: https://github.com/llvm/llvm-project/pull/149530.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3beda6bc5ba38..5d9bee7b30fad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2139,6 +2139,44 @@ static Instruction *foldComplexAndOrPatterns(BinaryOperator &I,
X);
}
+ // ((~Z) & ((X & Y) | (~X & ~Y))) ^ (Z & ((X & Y) | (X & ~Y))) -> ~((Y | Z) ^ X)
+ {
+ {
+ Value *X, *Y, *Z;
+ Value *SomethingOrZ, *ZAndX;
+
+ if (match(&I, m_c_Xor(m_Value(SomethingOrZ), m_Value(ZAndX))) &&
+ match(ZAndX, m_And(m_Value(Z), m_Value(X))) &&
+ match(SomethingOrZ, m_Or(m_Value(), m_Specific(Z)))) {
+ Value *Something;
+ if (match(SomethingOrZ, m_Or(m_Value(Something), m_Specific(Z))) &&
+ match(Something, m_Xor(m_Specific(X), m_Value(Y)))) {
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
+ }
+
+ // ((X & Y) | (~X & ~Y)) ^ (Z & (((X & Y) | (~X & ~Y)) ^ ((X & Y) | (X & ~Y)))) -> ~((Y | Z) ^ X)
+ if (match(Op1, m_AllOnes())) {
+ Value *X, *Y, *Z;
+ Value *XorWithY;
+ if (match(Op0, m_Xor(m_Value(XorWithY), m_Value(Y)))) {
+ Value *ZAndNotY;
+ if (match(XorWithY, m_Xor(m_Value(X), m_Value(ZAndNotY)))) {
+ Value *NotY;
+ if (match(ZAndNotY, m_And(m_Value(Z), m_Value(NotY))) &&
+ match(NotY, m_Not(m_Specific(Y)))) {
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
+ }
+ }
+ }
+
return nullptr;
}
@@ -3780,6 +3818,18 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return replaceInstUsesWith(I, V);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+
+ // ((X & Y & ~Z) | (X & ~Y & Z) | (~X & ~Y &~Z) | (X & Y &Z)) -> ~((Y | Z) ^ X)
+ {
+ Value *X, *Y, *Z;
+ Value *Term1, *Term2, *XAndYAndZ;
+ if (match(&I, m_Or(m_Or(m_Value(Term1), m_Value(Term2)), m_Value(XAndYAndZ))) &&
+ match(XAndYAndZ, m_And(m_And(m_Value(X), m_Value(Y)), m_Value(Z)))) {
+ Value *YOrZ = Builder.CreateOr(Y, Z);
+ Value *YOrZXorX = Builder.CreateXor(YOrZ, X);
+ return BinaryOperator::CreateNot(YOrZXorX);
+ }
+ }
Type *Ty = I.getType();
if (Ty->isIntOrIntVectorTy(1)) {
if (auto *SI0 = dyn_cast<SelectInst>(Op0)) {
diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll
new file mode 100644
index 0000000000000..4e45f88956d89
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr97044.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+; Tests for GitHub issue #97044 - Boolean instruction canonicalization
+; All expressions should optimise to the same canonical form: ~((y | z) ^ x)
+
+define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test0_4way_or(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], -1
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %not = xor i32 %z, -1
+ %and = and i32 %y, %not
+ %and1 = and i32 %and, %x
+ %not2 = xor i32 %y, -1
+ %and3 = and i32 %x, %not2
+ %and4 = and i32 %and3, %z
+ %or = or i32 %and1, %and4
+ %not5 = xor i32 %x, -1
+ %not6 = xor i32 %y, -1
+ %and7 = and i32 %not5, %not6
+ %not8 = xor i32 %z, -1
+ %and9 = and i32 %and7, %not8
+ %or10 = or i32 %or, %and9
+ %and11 = and i32 %x, %y
+ %and12 = and i32 %and11, %z
+ %or13 = or i32 %or10, %and12
+ ret i32 %or13
+}
+
+define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test1_xor_pattern(
+; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[AND4_DEMORGAN:%.*]] = or i32 [[TMP2]], [[Z:%.*]]
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z]], [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[AND4_DEMORGAN]], -1
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[AND8]], [[TMP4]]
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %not = xor i32 %z, -1
+ %and = and i32 %x, %y
+ %not1 = xor i32 %x, -1
+ %not2 = xor i32 %y, -1
+ %and3 = and i32 %not1, %not2
+ %or = or i32 %and, %and3
+ %and4 = and i32 %not, %or
+ %and5 = and i32 %x, %y
+ %and6 = and i32 %x, %not2
+ %or7 = or i32 %and5, %and6
+ %and8 = and i32 %z, %or7
+ %xor = xor i32 %and4, %and8
+ ret i32 %xor
+}
+
+define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test2_nested_xor(
+; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2:%.*]], -1
+; CHECK-NEXT: [[AND8:%.*]] = and i32 [[Z:%.*]], [[TMP3]]
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[X:%.*]], [[AND8]]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %and = and i32 %x, %y
+ %not = xor i32 %x, -1
+ %not1 = xor i32 %y, -1
+ %and2 = and i32 %not, %not1
+ %or = or i32 %and, %and2
+ %and3 = and i32 %x, %y
+ %not4 = xor i32 %y, -1
+ %and5 = and i32 %x, %not4
+ %or6 = or i32 %and3, %and5
+ %xor = xor i32 %or, %or6
+ %not7 = xor i32 %y, -1
+ %and8 = and i32 %z, %not7
+ %and9 = and i32 %xor, %and8
+ %xor10 = xor i32 %or, %and9
+ %xor11 = xor i32 %xor10, %y
+ %xor12 = xor i32 %xor11, -1
+ ret i32 %xor12
+}
+
+define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test3_already_optimal(
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]]
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]]
+; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[XOR]], -1
+; CHECK-NEXT: ret i32 [[NOT]]
+;
+ %or = or i32 %y, %z
+ %xor = xor i32 %or, %x
+ %not = xor i32 %xor, -1
+ ret i32 %not
+}
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
7c960cb to
eb53480
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We don't accept complex patterns without real-world usefulness: https://llvm.org/docs/InstCombineContributorGuide.html#real-world-usefulness
Currently, the original case can be simplified into a smaller expression: https://godbolt.org/z/En3za8Gjz. Please check if we can fold it into the target pattern through a simpler transformation (2-3 instructions). Then you need to demonstrate its real-world usefulness.
Reducing the size of an expression is not the business of optimizing compilers. Instead, it is widely used by decompilers. See also https://docs.hex-rays.com/user-guide/decompiler/goomba.
60f76f9 to
9aef3f0
Compare
9aef3f0 to
a95e12b
Compare
a95e12b to
db43e0a
Compare
db43e0a to
02807e3
Compare
Pull Request: Response to Changes Requested and Discussion in #97044Design Change: Pattern Matching → Truth Table ApproachBefore:
After:
Key Changes
Testing & Validation
Future ExtensibilityFramework can support all 256 possible 3-variable boolean functions. For now may be better to see where this goes and incrementally enable addition of new cases with time. |
|
Hi @dtcxzyw, gentle ping but I have implemented a version of the table you suggested in issue #97044 and covers/solves the use case described in that issue
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Crash reproducer:
; bin/opt -passes=instcombine test.ll -S
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define void @main(<4 x i1> %broadcast.splatinsert, <4 x i1> %broadcast.splat, <4 x i1> %0, ptr %p) {
entry:
%1 = xor <4 x i1> %broadcast.splatinsert, %0
%2 = xor <4 x i1> %broadcast.splat, %broadcast.splatinsert
%3 = zext <4 x i1> %1 to <4 x i32>
%4 = zext <4 x i1> %2 to <4 x i32>
%bin.rdx = or <4 x i32> %4, %3
%5 = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> %bin.rdx)
br label %for.cond5.preheader.i.i
for.cond5.preheader.i.i: ; preds = %entry
store i32 %5, ptr %p, align 4
ret void
}
opt: /home/dtcxzyw/WorkSpace/Projects/compilers/llvm-project/llvm/lib/IR/Instruction.cpp:345: bool llvm::Instruction::comesBefore(const llvm::Instruction*) const: Assertion `getParent() == Other->getParent() && "cross-BB instruction order comparison"' failed.
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: bin/opt -passes=instcombine reduced.ll -S
1. Running pass "function(instcombine<max-iterations=1;verify-fixpoint>)" on module "reduced.ll"
2. Running pass "instcombine<max-iterations=1;verify-fixpoint>" on function "main"
#0 0x00007da9aae2eb62 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.22.0git+0x22eb62)
#1 0x00007da9aae2b12f llvm::sys::RunSignalHandlers() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMSupport.so.22.0git+0x22b12f)
#2 0x00007da9aae2b27c SignalHandler(int, siginfo_t*, void*) Signals.cpp:0:0
#3 0x00007da9aa845330 (/lib/x86_64-linux-gnu/libc.so.6+0x45330)
#4 0x00007da9aa89eb2c __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
#5 0x00007da9aa89eb2c __pthread_kill_internal ./nptl/pthread_kill.c:78:10
#6 0x00007da9aa89eb2c pthread_kill ./nptl/pthread_kill.c:89:10
#7 0x00007da9aa84527e raise ./signal/../sysdeps/posix/raise.c:27:6
#8 0x00007da9aa8288ff abort ./stdlib/abort.c:81:7
#9 0x00007da9aa82881b _nl_load_domain ./intl/loadmsgcat.c:1177:9
#10 0x00007da9aa83b517 (/lib/x86_64-linux-gnu/libc.so.6+0x3b517)
#11 0x00007da9a147ea93 (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x27ea93)
#12 0x00007da9a268a638 void std::__insertion_sort<llvm::Instruction**, __gnu_cxx::__ops::_Iter_comp_iter<extractThreeVariablesAndInstructions(llvm::Value*, llvm::SmallVectorImpl<llvm::Instruction*>&)::'lambda0'(llvm::Instruction*, llvm::Instruction*)>>(llvm::Instruction**, llvm::Instruction**, __gnu_cxx::__ops::_Iter_comp_iter<extractThreeVariablesAndInstructions(llvm::Value*, llvm::SmallVectorImpl<llvm::Instruction*>&)::'lambda0'(llvm::Instruction*, llvm::Instruction*)>) (.constprop.0) InstCombineAndOrXor.cpp:0:0
#13 0x00007da9a26ab5a7 foldThreeVarBoolExpr(llvm::Instruction&, llvm::IRBuilder<llvm::TargetFolder, llvm::IRBuilderCallbackInserter>&) InstCombineAndOrXor.cpp:0:0
#14 0x00007da9a26af3c7 llvm::InstCombinerImpl::visitOr(llvm::BinaryOperator&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0xaf3c7)
#15 0x00007da9a2668398 llvm::InstCombinerImpl::run() (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0x68398)
#16 0x00007da9a2669521 combineInstructionsOverFunction(llvm::Function&, llvm::InstructionWorklist&, llvm::AAResults*, llvm::AssumptionCache&, llvm::TargetLibraryInfo&, llvm::TargetTransformInfo&, llvm::DominatorTree&, llvm::OptimizationRemarkEmitter&, llvm::BlockFrequencyInfo*, llvm::BranchProbabilityInfo*, llvm::ProfileSummaryInfo*, llvm::InstCombineOptions const&) InstructionCombining.cpp:0:0
#17 0x00007da9a266a564 llvm::InstCombinePass::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMInstCombine.so.22.0git+0x6a564)
#18 0x00007da9a4daa0e5 llvm::detail::PassModel<llvm::Function, llvm::InstCombinePass, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libPolly.so.22.0git+0x1aa0e5)
#19 0x00007da9a1522069 llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x322069)
#20 0x00007da9a98dd6c5 llvm::detail::PassModel<llvm::Function, llvm::PassManager<llvm::Function, llvm::AnalysisManager<llvm::Function>>, llvm::AnalysisManager<llvm::Function>>::run(llvm::Function&, llvm::AnalysisManager<llvm::Function>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMX86CodeGen.so.22.0git+0xdd6c5)
#21 0x00007da9a1522582 llvm::ModuleToFunctionPassAdaptor::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x322582)
#22 0x00007da9ab091585 llvm::detail::PassModel<llvm::Module, llvm::ModuleToFunctionPassAdaptor, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x20585)
#23 0x00007da9a15238ad llvm::PassManager<llvm::Module, llvm::AnalysisManager<llvm::Module>>::run(llvm::Module&, llvm::AnalysisManager<llvm::Module>&) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/../lib/libLLVMCore.so.22.0git+0x3238ad)
#24 0x00007da9ab09e86e llvm::runPassPipeline(llvm::StringRef, llvm::Module&, llvm::TargetMachine*, llvm::TargetLibraryInfoImpl*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::ToolOutputFile*, llvm::StringRef, llvm::ArrayRef<llvm::PassPlugin>, llvm::ArrayRef<std::function<void (llvm::PassBuilder&)>>, llvm::opt_tool::OutputKind, llvm::opt_tool::VerifierKind, bool, bool, bool, bool, bool, bool, bool, bool) (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x2d86e)
#25 0x00007da9ab0a9a6a optMain (/home/dtcxzyw/WorkSpace/Projects/compilers/LLVM/llvm-build/bin/../lib/libLLVMOptDriver.so.22.0git+0x38a6a)
#26 0x00007da9aa82a1ca __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:74:3
#27 0x00007da9aa82a28b call_init ./csu/../csu/libc-start.c:128:20
#28 0x00007da9aa82a28b __libc_start_main ./csu/../csu/libc-start.c:347:5
#29 0x0000605378783095 _start (bin/opt+0x1095)
Aborted (core dumped)
Thanks for this, since we are now extracting both variables and instructions I would similarly need to ensure not just variables but the computation instructions are in the same BB before using |
|
Infinite loop reproducer: |
Thanks! this was definitely a byproduct of the change to treat non bitwise logic ops as leaf nodes. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't have further comments. Please wait for additional approval from other reviewers.
|
Some nit comments but largely LGTM. |
|
@nikic are you happy with the changes? |
|
Hi @nikic, just following up on this PR. Are you free to take a look when you get a moment. |
|
Hi @nikic, I'm taking over this MR since Yafet has finished his internship. Could you please take a look? |
|
@nikic Sorry for pinging you again, could you please check this when you have time |
|
@nikic gentle ping again |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry for the delay. My main question is about the SortedVars.
| default: | ||
| return nullptr; | ||
| case 0x00: // Always FALSE | ||
| Result = FoldConstant(false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| Result = FoldConstant(false); | |
| Result = ConstantInt::getFalse(Op0->getType()); |
I don't think we need a helper function for this... (also for true below).
| static Value *foldThreeVarBoolExpr(Instruction &Root, | ||
| InstCombiner::BuilderTy &Builder) { | ||
|
|
||
| auto &BO = cast<BinaryOperator>(Root); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Directly accept BinaryOperator as the argument? (It will downcast to Instruction where needed.)
| /// combination. | ||
| static std::optional<std::bitset<8>> | ||
| evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2, | ||
| const SmallVector<Instruction *> &Instructions) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| const SmallVector<Instruction *> &Instructions) { | |
| ArrayRef<Instruction *> Instructions) { |
| default: | ||
| llvm_unreachable("Unexpected opcode in boolean expression evaluation"); | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What about the case where it's neither Not nor BinaryOperator? I assume it can't happen, in which case we should make that dyn_cast above a cast.
| // Traverse root operands to avoid treating them as leaf variables to prevent | ||
| // infinite cycles. | ||
| if (auto *RootInst = dyn_cast<Instruction>(Root)) | ||
| for (Use &U : RootInst->operands()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (Use &U : RootInst->operands()) | |
| for (Value *Op : RootInst->operands()) |
If you're not using the Use, prefer directly iterating over Value *.
| } | ||
| if (auto *BO = dyn_cast<BinaryOperator>(V)) { | ||
| if (!BO->isBitwiseLogicOp()) { | ||
| if (V == Root) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can this happen? Wouldn't this imply root is a non-bitwise op? I think this can be an assert.
| // Check that all instructions (both variables and computation instructions) | ||
| // are in the same BB. | ||
| SmallVector<Value *, 3> SortedVars(Variables.begin(), Variables.end()); | ||
| BasicBlock *FirstBB = nullptr; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can simplify this by always comparing to Root->getParent() (after changing Root to an Instruction argument).
|
|
||
| for (Instruction *I : Instructions) { | ||
| Value *NotV; | ||
| bool IsNot = match(I, m_Not(m_Value(NotV))); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Inline IsNot used in one place.
| // pattern matcher). | ||
| return {nullptr, nullptr, nullptr}; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do I understand correctly that what this actually does is discarding cases with constant operands, as everything else should have already been handled in the initial loop? If so, can we bail out on constant operands there already?
| return cast<Argument>(A)->getArgNo() < cast<Argument>(B)->getArgNo(); | ||
|
|
||
| return cast<Instruction>(A)->comesBefore(cast<Instruction>(B)); | ||
| }); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The reason for sorting the instructions is obvious, but I don't really understand why we need/want sorted variables.
Is this working around the fact that createLogicFromTable3Var only handles a subset of all cases and thus different variable order will fail the transform? If so, I don't think this is a good idea, as the result will be very fragile. E.g. if I take the first test and do this change, it's going to fail:
-define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) {
+define i32 @test0_4way_or(i32 %z, i32 %y, i32 %x) {
I think if we do any sorting for the variables it needs to happen at the level of createLogicFromTable3Var(), where we can e.g. try to swap variables to avoid having to handle all 255 cases, and only handle the non-commuted ones.
Fixes #97044
Optimizations added:
((x&y&~z) | (x&~y&z) | (~x&~y&~z) | (x&y&z))→~((y | z) ^ x)((~z) & ((x&y) | (~x&~y))) ^ (z & ((x&y) | (x&~y)))→~((y | z) ^ x)((x&y) | (~x&~y)) ^ (z & (((x&y) | (~x&~y)) ^ ((x&y) | (x&~y))))→~((y | z) ^ x)Tests Pre-Commited in order to view the effect of the canocalization.
I have also provided Alive2 Proofs here - https://alive2.llvm.org/ce/z/RwhZzS
Results :