diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 9b272c4721cbd..112599b48c33b 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/InstCombine/InstCombiner.h" #include "llvm/Transforms/Utils/Local.h" +#include using namespace llvm; using namespace PatternMatch; @@ -50,6 +51,242 @@ static Value *getFCmpValue(unsigned Code, Value *LHS, Value *RHS, return Builder.CreateFCmpFMF(NewPred, LHS, RHS, FMF); } +/// This is to create optimal 3-variable boolean logic from truth tables. +/// Currently it supports the cases for canonicalizing to the form ~((Op1 | Op2) +/// ^ Op0). More cases can be systematically added based on real-world +/// justification for specific 3 input cases. +static Value *createLogicFromTable3Var(const std::bitset<8> &Table, Value *Op0, + Value *Op1, Value *Op2, Value *Root, + IRBuilderBase &Builder) { + uint8_t TruthValue = Table.to_ulong(); + auto FoldConstant = [&](bool Val) { + Type *Ty = Op0->getType(); + return Val ? ConstantInt::getTrue(Ty) : ConstantInt::getFalse(Ty); + }; + + Value *Result = nullptr; + switch (TruthValue) { + default: + return nullptr; + case 0x00: // Always FALSE + Result = FoldConstant(false); + break; + case 0xFF: // Always TRUE + Result = FoldConstant(true); + break; + case 0xE1: // ~((Op1 | Op2) ^ Op0) + { + Value *Or = Builder.CreateOr(Op1, Op2); + Value *Xor = Builder.CreateXor(Or, Op0); + Result = Builder.CreateNot(Xor); + } break; + case 0x60: // Op0 & (Op1 ^ Op2) + { + Value *Xor = Builder.CreateXor(Op1, Op2); + Result = Builder.CreateAnd(Op0, Xor); + } break; + case 0xD2: // ((Op1 | Op2) ^ Op0) ^ Op1 + { + Value *Or = Builder.CreateOr(Op1, Op2); + Value *Xor1 = Builder.CreateXor(Or, Op0); + Result = Builder.CreateXor(Xor1, Op1); + } break; + } + + return Result; +} + +/// Extracts exactly 3 variables for truth table optimization from a boolean +/// expression tree. Traverses single-use instructions, handles non-bitwise ops +/// as leaf variables, and validates the expression tree structure before +/// returning the variables in deterministic order. Returns {nullptr, nullptr, +/// nullptr} if the pattern doesn't match 3-variable optimization criteria in +/// order to enable an early return. +static std::tuple +extractThreeVariablesAndInstructions( + Value *Root, SmallVectorImpl &Instructions) { + SmallPtrSet Variables; + SmallPtrSet Visited; + SmallPtrSet RootOperands; + SmallVector Worklist; + Worklist.push_back(Root); + + // Traverse root operands to avoid treating them as leaf variables to prevent + // infinite cycles. + if (auto *RootInst = dyn_cast(Root)) + for (Use &U : RootInst->operands()) + RootOperands.insert(U.get()); + + while (!Worklist.empty()) { + Value *V = Worklist.pop_back_val(); + + if (!Visited.insert(V).second) + continue; + + // Due to lack of cost-based heuristic, only traverse if it belongs to this + // expression tree. + bool ShouldTraverse = (V == Root || V->hasOneUse()); + + if (Value *NotV; match(V, m_Not(m_Value(NotV)))) { + if (auto *I = dyn_cast(V)) + Instructions.push_back(I); + if (ShouldTraverse) + Worklist.push_back(NotV); + continue; + } + if (auto *BO = dyn_cast(V)) { + if (!BO->isBitwiseLogicOp()) { + if (V == Root) + return {nullptr, nullptr, nullptr}; + if (!RootOperands.count(V)) + Variables.insert(V); + continue; + } + + Instructions.push_back(BO); + + if (ShouldTraverse) { + Worklist.push_back(BO->getOperand(0)); + Worklist.push_back(BO->getOperand(1)); + } + } else if ((isa(V) || isa(V)) && V != Root) { + if (!RootOperands.count(V)) + Variables.insert(V); + } + } + + if (Variables.size() != 3) + return {nullptr, nullptr, nullptr}; + // Check that all instructions (both variables and computation instructions) + // are in the same BB. + SmallVector SortedVars(Variables.begin(), Variables.end()); + BasicBlock *FirstBB = nullptr; + + auto CheckSameBB = [&FirstBB](Instruction *I) -> bool { + if (!FirstBB) + FirstBB = I->getParent(); + else if (I->getParent() != FirstBB) + return false; + return true; + }; + + for (Value *V : SortedVars) + if (auto *I = dyn_cast(V); I && !CheckSameBB(I)) + return {nullptr, nullptr, nullptr}; + + for (Instruction *I : Instructions) + if (!CheckSameBB(I)) + return {nullptr, nullptr, nullptr}; + + // Validation that all collected instructions have operands that will be in + // Computed map. + SmallPtrSet ValidOperands(Variables.begin(), Variables.end()); + ValidOperands.insert(Instructions.begin(), Instructions.end()); + + for (Instruction *I : Instructions) { + Value *NotV; + bool IsNot = match(I, m_Not(m_Value(NotV))); + + if (!IsNot) { + for (Use &U : I->operands()) { + if (!ValidOperands.count(U.get())) + return {nullptr, nullptr, nullptr}; + } + } else if (!ValidOperands.count(NotV)) { + // For NOT: only check the variable operand (constant -1 is handled by + // pattern matcher). + return {nullptr, nullptr, nullptr}; + } + } + + llvm::sort(SortedVars, [](Value *A, Value *B) { + if (isa(A) != isa(B)) + return isa(A); + + if (isa(A)) + return cast(A)->getArgNo() < cast(B)->getArgNo(); + + return cast(A)->comesBefore(cast(B)); + }); + + // Sort instructions (Useful until all 256 cases are added). + llvm::sort(Instructions, + [](Instruction *A, Instruction *B) { return A->comesBefore(B); }); + + return {SortedVars[0], SortedVars[1], SortedVars[2]}; +} + +/// Computes the 8-bit truth table for a 3-variable boolean expression using +/// symbolic execution. Assigns each variable a bit pattern representing when +/// it's true across all 8 input combinations, then simulates each instruction +/// with bitwise operations to obtain the final truth table. Returns the +/// resulting pattern where each bit represents the output for one input +/// combination. +static std::optional> +evaluateBooleanExpression(Value *Expr, Value *Op0, Value *Op1, Value *Op2, + const SmallVector &Instructions) { + + // Initialize bit-vector values for the 3 variables as: + // Op0: 0b11110000 (true for combinations 000,001,010,011) + // Op1: 0b11001100 (true for combinations 000,001,100,101) + // Op2: 0b10101010 (true for combinations 000,010,100,110) + SmallDenseMap> Computed; + Computed[Op0] = std::bitset<8>(0xF0); // 11110000 + Computed[Op1] = std::bitset<8>(0xCC); // 11001100 + Computed[Op2] = std::bitset<8>(0xAA); // 10101010 + + for (Instruction *I : Instructions) { + Value *NotV; + if (match(I, m_Not(m_Value(NotV)))) { + Computed[I] = ~Computed.at(NotV); // Bitwise NOT + } else if (auto *BO = dyn_cast(I)) { + auto &LHS = Computed.at(BO->getOperand(0)); + auto &RHS = Computed.at(BO->getOperand(1)); + + switch (BO->getOpcode()) { + case Instruction::And: + Computed[I] = LHS & RHS; // Bitwise AND + break; + case Instruction::Or: + Computed[I] = LHS | RHS; // Bitwise OR + break; + case Instruction::Xor: + Computed[I] = LHS ^ RHS; // Bitwise XOR + break; + default: + llvm_unreachable("Unexpected opcode in boolean expression evaluation"); + } + } + } + + return std::bitset<8>(Computed.at(Expr)); +} + +/// Entry point for the 3-variable boolean expression folding and handles early +/// returns. +static Value *foldThreeVarBoolExpr(Instruction &Root, + InstCombiner::BuilderTy &Builder) { + + auto &BO = cast(Root); + assert(BO.isBitwiseLogicOp() && "Unexpected opcode for boolean expression"); + + if (!isa(BO.getOperand(0)) || + !isa(BO.getOperand(1))) + return nullptr; + + SmallVector Instructions; + auto [Op0, Op1, Op2] = + extractThreeVariablesAndInstructions(&Root, Instructions); + if (!Op0 || !Op1 || !Op2) + return nullptr; + + auto Table = evaluateBooleanExpression(&Root, Op0, Op1, Op2, Instructions); + if (!Table) + return nullptr; + + return createLogicFromTable3Var(*Table, Op0, Op1, Op2, &Root, Builder); +} + /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise /// (V < Lo || V >= Hi). This method expects that Lo < Hi. IsSigned indicates /// whether to treat V, Lo, and Hi as signed or not. @@ -2421,6 +2658,9 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { if (Instruction *Phi = foldBinopWithPhiOperands(I)) return Phi; + if (Value *Canonical = foldThreeVarBoolExpr(I, Builder)) + return replaceInstUsesWith(I, Canonical); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -4006,6 +4246,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Instruction *Phi = foldBinopWithPhiOperands(I)) return Phi; + if (Value *Canonical = foldThreeVarBoolExpr(I, Builder)) + return replaceInstUsesWith(I, Canonical); + // See if we can simplify any instructions used by the instruction whose sole // purpose is to compute bits we don't care about. if (SimplifyDemandedInstructionBits(I)) @@ -5156,6 +5399,9 @@ Instruction *InstCombinerImpl::visitXor(BinaryOperator &I) { if (Instruction *Phi = foldBinopWithPhiOperands(I)) return Phi; + if (Value *Canonical = foldThreeVarBoolExpr(I, Builder)) + return replaceInstUsesWith(I, Canonical); + if (Instruction *NewXor = foldXorToXor(I, Builder)) return NewXor; diff --git a/llvm/test/Transforms/InstCombine/pr97044.ll b/llvm/test/Transforms/InstCombine/pr97044.ll new file mode 100644 index 0000000000000..c8a12599c22b4 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/pr97044.ll @@ -0,0 +1,586 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; Tests for GitHub issue #97044 and PR-#149530 - Boolean expression canonicalization +define i32 @test0_4way_or(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test0_4way_or( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[OR13:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[OR13]] +; + %not = xor i32 %z, -1 + %and = and i32 %y, %not + %and1 = and i32 %and, %x + %not2 = xor i32 %y, -1 + %and3 = and i32 %x, %not2 + %and4 = and i32 %and3, %z + %or = or i32 %and1, %and4 + %not5 = xor i32 %x, -1 + %not6 = xor i32 %y, -1 + %and7 = and i32 %not5, %not6 + %not8 = xor i32 %z, -1 + %and9 = and i32 %and7, %not8 + %or10 = or i32 %or, %and9 + %and11 = and i32 %x, %y + %and12 = and i32 %and11, %z + %or13 = or i32 %or10, %and12 + ret i32 %or13 +} +define i32 @test1_xor_pattern(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test1_xor_pattern( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[XOR]] +; + %not = xor i32 %z, -1 + %and = and i32 %x, %y + %not1 = xor i32 %x, -1 + %not2 = xor i32 %y, -1 + %and3 = and i32 %not1, %not2 + %or = or i32 %and, %and3 + %and4 = and i32 %not, %or + %and5 = and i32 %x, %y + %and6 = and i32 %x, %not2 + %or7 = or i32 %and5, %and6 + %and8 = and i32 %z, %or7 + %xor = xor i32 %and4, %and8 + ret i32 %xor +} +define i32 @test2_nested_xor(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test2_nested_xor( +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP2]], [[Y]] +; CHECK-NEXT: ret i32 [[TMP3]] +; + %and = and i32 %x, %y + %not = xor i32 %x, -1 + %not1 = xor i32 %y, -1 + %and2 = and i32 %not, %not1 + %or = or i32 %and, %and2 + %and3 = and i32 %x, %y + %not4 = xor i32 %y, -1 + %and5 = and i32 %x, %not4 + %or6 = or i32 %and3, %and5 + %xor = xor i32 %or, %or6 + %not7 = xor i32 %y, -1 + %and8 = and i32 %z, %not7 + %and9 = and i32 %xor, %and8 + %xor10 = xor i32 %or, %and9 + %xor11 = xor i32 %xor10, %y + %xor12 = xor i32 %xor11, -1 + ret i32 %xor12 +} +define i32 @test3_already_optimal(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @test3_already_optimal( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[XOR]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; + %or = or i32 %y, %z + %xor = xor i32 %or, %x + %not = xor i32 %xor, -1 + ret i32 %not +} + +define i32 @test_add_as_leaf(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @test_add_as_leaf( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT3:%.*]] = xor i32 [[X]], -1 +; CHECK-NEXT: [[AND4:%.*]] = and i32 [[ADD]], [[NOT3]] +; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[Y:%.*]], [[AND4]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[TMP1]], [[ADD]] +; CHECK-NEXT: ret i32 [[XOR]] +; + %add = add i32 %x, %c + %not1 = xor i32 %add, -1 + %and1 = and i32 %not1, %y + %not2 = xor i32 %y, -1 + %and2 = and i32 %add, %not2 + %or = or i32 %and1, %and2 + %and3 = and i32 %x, %y + %not3 = xor i32 %x, -1 + %and4 = and i32 %not3, %add + %xor = xor i32 %or, %and4 + ret i32 %xor +} + +define i32 @test_sub_as_leaf(i32 %a, i32 %b, i32 %offset) { +; CHECK-LABEL: @test_sub_as_leaf( +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[B:%.*]], [[SUB]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[A]] +; CHECK-NEXT: [[RESULT:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %sub = sub i32 %a, %offset + %not1 = xor i32 %sub, -1 + %and1 = and i32 %not1, %b + %and2 = and i32 %and1, %a + %not2 = xor i32 %b, -1 + %and3 = and i32 %a, %not2 + %and4 = and i32 %and3, %sub + %or = or i32 %and2, %and4 + %not3 = xor i32 %a, -1 + %not4 = xor i32 %b, -1 + %and5 = and i32 %not3, %not4 + %not5 = xor i32 %sub, -1 + %and6 = and i32 %and5, %not5 + %or2 = or i32 %or, %and6 + %and7 = and i32 %a, %b + %and8 = and i32 %and7, %sub + %result = or i32 %or2, %and8 + ret i32 %result +} + +; ============================== +; Negative Tests +; ============================== + +define i32 @negative_non_bitwise_add(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @negative_non_bitwise_add( +; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[ADD2:%.*]] = add i32 [[ADD1]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[ADD2]] +; + %add1 = add i32 %x, %y + %add2 = add i32 %add1, %z + ret i32 %add2 +} +define i32 @negative_two_variables(i32 %x, i32 %y) { +; CHECK-LABEL: @negative_two_variables( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; + %and = and i32 %x, %y + %not = xor i32 %and, -1 + ret i32 %not +} +define i32 @negative_four_variables(i32 %x, i32 %y, i32 %z, i32 %w) { +; CHECK-LABEL: @negative_four_variables( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[Z:%.*]], [[W:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND1]], [[AND2]] +; CHECK-NEXT: ret i32 [[OR]] +; + %and1 = and i32 %x, %y + %and2 = and i32 %z, %w + %or = or i32 %and1, %and2 + ret i32 %or +} +define i32 @negative_simple_expression(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @negative_simple_expression( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[OR]] +; + %and = and i32 %x, %y + %or = or i32 %and, %z + ret i32 %or +} +define i32 @negative_different_basic_blocks(i32 %x, i32 %y, i32 %z, i1 %cond) { +; CHECK-LABEL: @negative_different_basic_blocks( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]] +; CHECK: if.true: +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[AND2]] +; CHECK: if.false: +; CHECK-NEXT: ret i32 [[AND1]] +; +entry: + %and1 = and i32 %x, %y + br i1 %cond, label %if.true, label %if.false +if.true: + %and2 = and i32 %and1, %z + ret i32 %and2 +if.false: + ret i32 %and1 +} +define i32 @negative_two_vars_one_const(i32 %x, i32 %y) { +; CHECK-LABEL: @negative_two_vars_one_const( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], 42 +; CHECK-NEXT: [[AND3_DEMORGAN:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[AND3_DEMORGAN]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND2]], [[AND3]] +; CHECK-NEXT: ret i32 [[OR]] +; + %and1 = and i32 %x, %y + %and2 = and i32 %and1, 42 + %not_x = xor i32 %x, -1 + %not_y = xor i32 %y, -1 + %and3 = and i32 %not_x, %not_y + %or = or i32 %and2, %and3 + ret i32 %or +} + +define i32 @negative_one_var_two_consts(i32 %x) { +; CHECK-LABEL: @negative_one_var_two_consts( +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[X:%.*]], 7 +; CHECK-NEXT: [[NOT_X:%.*]] = and i32 [[X]], 3 +; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[NOT_X]], 3 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[AND2]], [[AND3]] +; CHECK-NEXT: ret i32 [[OR]] +; + %and1 = and i32 %x, 15 + %and2 = and i32 %and1, 7 + %not_x = xor i32 %x, -1 + %and3 = and i32 %not_x, 3 + %or = or i32 %and2, %and3 + ret i32 %or +} + +define i32 @negative_const_pattern_match(i32 %x, i32 %y) { +; CHECK-LABEL: @negative_const_pattern_match( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], 255 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[XOR]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; + %or = or i32 %y, 255 + %xor = xor i32 %or, %x + %not = xor i32 %xor, -1 + ret i32 %not +} + +define i32 @negative_mixed_vars_consts(i32 %x, i32 %y) { +; CHECK-LABEL: @negative_mixed_vars_consts( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], 96 +; CHECK-NEXT: [[NOT_X:%.*]] = and i32 [[X]], 170 +; CHECK-NEXT: [[AND3:%.*]] = xor i32 [[NOT_X]], 170 +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[AND2]], [[AND3]] +; CHECK-NEXT: [[AND4:%.*]] = and i32 [[Y]], 204 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[AND4]] +; CHECK-NEXT: ret i32 [[XOR]] +; + %and1 = and i32 %x, %y + %and2 = and i32 %and1, 96 + %not_x = xor i32 %x, -1 + %and3 = and i32 %not_x, 170 + %or1 = or i32 %and2, %and3 + %and4 = and i32 %y, 204 + %xor = xor i32 %or1, %and4 + ret i32 %xor +} + +define i32 @negative_const_blocks_extraction(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @negative_const_blocks_extraction( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], 42 +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[AND1]], [[AND2]] +; CHECK-NEXT: [[AND3:%.*]] = and i32 [[X]], 24 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[OR1]], [[AND3]] +; CHECK-NEXT: ret i32 [[XOR]] +; + %and1 = and i32 %x, 42 + %and2 = and i32 %y, %z + %or1 = or i32 %and1, %and2 + %and3 = and i32 %x, 24 + %xor = xor i32 %or1, %and3 + ret i32 %xor +} + +define i32 @negative_single_use_add(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @negative_single_use_add( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[AND]] +; + %add = add i32 %x, %y ; Single-use non-bitwise op + %and = and i32 %add, %z ; Only 2 variables: %add, %z (should not optimize) + ret i32 %and +} + +define i32 @negative_add_as_root_operand(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @negative_add_as_root_operand( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], [[Y:%.*]] +; CHECK-NEXT: ret i32 [[AND]] +; + %add = add i32 %x, %c + %and = and i32 %add, %y ; Root: %add is direct operand + ret i32 %and +} + +define i32 @negative_sub_as_root_operand(i32 %a, i32 %b, i32 %offset) { +; CHECK-LABEL: @negative_sub_as_root_operand( +; CHECK-NEXT: [[SUB:%.*]] = sub i32 [[A:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[SUB]], [[B:%.*]] +; CHECK-NEXT: ret i32 [[XOR]] +; +; Similar test with SUB as root operand + %sub = sub i32 %a, %offset + %xor = xor i32 %sub, %b ; Root: %sub is direct operand + ret i32 %xor +} + +; ============================== +; Multi-use Tests +; ============================== +declare void @use(i32) +define i32 @multi_use_not(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @multi_use_not( +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[TMP1]], [[NOT1]] +; CHECK-NEXT: ret i32 [[AND2]] +; + %not1 = xor i32 %x, -1 + call void @use(i32 %not1) + %and1 = and i32 %not1, %y + %and2 = and i32 %and1, %z + ret i32 %and2 +} +define i32 @multi_use_binop(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @multi_use_binop( +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: call void @use(i32 [[AND1]]) +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[AND2]] +; + %and1 = and i32 %x, %y + call void @use(i32 %and1) + %and2 = and i32 %and1, %z + ret i32 %and2 +} +define i32 @multi_use_multiple(i32 %x, i32 %y, i32 %z) { +; CHECK-LABEL: @multi_use_multiple( +; CHECK-NEXT: [[NOT1:%.*]] = xor i32 [[X:%.*]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[Y:%.*]], [[NOT1]] +; CHECK-NEXT: call void @use(i32 [[NOT1]]) +; CHECK-NEXT: call void @use(i32 [[AND1]]) +; CHECK-NEXT: [[AND2:%.*]] = and i32 [[AND1]], [[Z:%.*]] +; CHECK-NEXT: ret i32 [[AND2]] +; + %not1 = xor i32 %x, -1 + %and1 = and i32 %not1, %y + call void @use(i32 %not1) + call void @use(i32 %and1) + %and2 = and i32 %and1, %z + ret i32 %and2 +} + +define i32 @multi_use_add_as_variable(i32 %x, i32 %y, i32 %offset) { +; CHECK-LABEL: @multi_use_add_as_variable( +; CHECK-NEXT: [[ADD:%.*]] = add i32 [[X:%.*]], [[OFFSET:%.*]] +; CHECK-NEXT: call void @use(i32 [[ADD]]) +; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[Y:%.*]], [[ADD]] +; CHECK-NEXT: [[TMP2:%.*]] = xor i32 [[TMP1]], [[X]] +; CHECK-NEXT: [[RESULT:%.*]] = xor i32 [[TMP2]], -1 +; CHECK-NEXT: ret i32 [[RESULT]] +; + %add = add i32 %x, %offset ; Multi-use non-bitwise op + call void @use(i32 %add) ; Extra use + %not1 = xor i32 %add, -1 + %and1 = and i32 %not1, %y + %and2 = and i32 %and1, %x + %not2 = xor i32 %y, -1 + %and3 = and i32 %x, %not2 + %and4 = and i32 %and3, %add + %or = or i32 %and2, %and4 + %not3 = xor i32 %x, -1 + %not4 = xor i32 %y, -1 + %and5 = and i32 %not3, %not4 + %not5 = xor i32 %add, -1 + %and6 = and i32 %and5, %not5 + %or2 = or i32 %or, %and6 + %and7 = and i32 %x, %y + %and8 = and i32 %and7, %add + %result = or i32 %or2, %and8 + ret i32 %result +} + +; ============================== +; Vector Tests +; ============================== + +define <2 x i32> @test0_4way_or_vec_splat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: @test0_4way_or_vec_splat( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[OR13:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: ret <2 x i32> [[OR13]] +; + %not = xor <2 x i32> %z, + %and = and <2 x i32> %y, %not + %and1 = and <2 x i32> %and, %x + %not2 = xor <2 x i32> %y, + %and3 = and <2 x i32> %x, %not2 + %and4 = and <2 x i32> %and3, %z + %or = or <2 x i32> %and1, %and4 + %not5 = xor <2 x i32> %x, + %not6 = xor <2 x i32> %y, + %and7 = and <2 x i32> %not5, %not6 + %not8 = xor <2 x i32> %z, + %and9 = and <2 x i32> %and7, %not8 + %or10 = or <2 x i32> %or, %and9 + %and11 = and <2 x i32> %x, %y + %and12 = and <2 x i32> %and11, %z + %or13 = or <2 x i32> %or10, %and12 + ret <2 x i32> %or13 +} + +define <2 x i32> @test0_4way_or_vec_splat_poison(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: @test0_4way_or_vec_splat_poison( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[OR13:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: ret <2 x i32> [[OR13]] +; + %not = xor <2 x i32> %z, + %and = and <2 x i32> %y, %not + %and1 = and <2 x i32> %and, %x + %not2 = xor <2 x i32> %y, + %and3 = and <2 x i32> %x, %not2 + %and4 = and <2 x i32> %and3, %z + %or = or <2 x i32> %and1, %and4 + %not5 = xor <2 x i32> %x, + %not6 = xor <2 x i32> %y, + %and7 = and <2 x i32> %not5, %not6 + %not8 = xor <2 x i32> %z, + %and9 = and <2 x i32> %and7, %not8 + %or10 = or <2 x i32> %or, %and9 + %and11 = and <2 x i32> %x, %y + %and12 = and <2 x i32> %and11, %z + %or13 = or <2 x i32> %or10, %and12 + ret <2 x i32> %or13 +} + +define <2 x i32> @test1_xor_pattern_vec_splat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: @test1_xor_pattern_vec_splat( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[TMP2]], splat (i32 -1) +; CHECK-NEXT: ret <2 x i32> [[XOR]] +; + %not = xor <2 x i32> %z, + %and = and <2 x i32> %x, %y + %not1 = xor <2 x i32> %x, + %not2 = xor <2 x i32> %y, + %and3 = and <2 x i32> %not1, %not2 + %or = or <2 x i32> %and, %and3 + %and4 = and <2 x i32> %not, %or + %and5 = and <2 x i32> %x, %y + %and6 = and <2 x i32> %x, %not2 + %or7 = or <2 x i32> %and5, %and6 + %and8 = and <2 x i32> %z, %or7 + %xor = xor <2 x i32> %and4, %and8 + ret <2 x i32> %xor +} + +define <2 x i32> @test2_nested_xor_vec_splat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: @test2_nested_xor_vec_splat( +; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[TMP3:%.*]] = xor <2 x i32> [[TMP2]], [[Y]] +; CHECK-NEXT: ret <2 x i32> [[TMP3]] +; + %and = and <2 x i32> %x, %y + %not = xor <2 x i32> %x, + %not1 = xor <2 x i32> %y, + %and2 = and <2 x i32> %not, %not1 + %or = or <2 x i32> %and, %and2 + %and3 = and <2 x i32> %x, %y + %not4 = xor <2 x i32> %y, + %and5 = and <2 x i32> %x, %not4 + %or6 = or <2 x i32> %and3, %and5 + %xor = xor <2 x i32> %or, %or6 + %not7 = xor <2 x i32> %y, + %and8 = and <2 x i32> %z, %not7 + %and9 = and <2 x i32> %xor, %and8 + %xor10 = xor <2 x i32> %or, %and9 + %xor11 = xor <2 x i32> %xor10, %y + %xor12 = xor <2 x i32> %xor11, + ret <2 x i32> %xor12 +} + +define <2 x i32> @test_add_as_leaf_vec_splat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %c) { +; CHECK-LABEL: @test_add_as_leaf_vec_splat( +; CHECK-NEXT: [[ADD:%.*]] = add <2 x i32> [[X:%.*]], [[C:%.*]] +; CHECK-NEXT: [[NOT3:%.*]] = xor <2 x i32> [[X]], splat (i32 -1) +; CHECK-NEXT: [[AND4:%.*]] = and <2 x i32> [[ADD]], [[NOT3]] +; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i32> [[Y:%.*]], [[AND4]] +; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i32> [[TMP1]], [[ADD]] +; CHECK-NEXT: ret <2 x i32> [[XOR]] +; + %add = add <2 x i32> %x, %c + %not1 = xor <2 x i32> %add, + %and1 = and <2 x i32> %not1, %y + %not2 = xor <2 x i32> %y, + %and2 = and <2 x i32> %add, %not2 + %or = or <2 x i32> %and1, %and2 + %and3 = and <2 x i32> %x, %y + %not3 = xor <2 x i32> %x, + %and4 = and <2 x i32> %not3, %add + %xor = xor <2 x i32> %or, %and4 + ret <2 x i32> %xor +} + +; Non-splat vector tests +define <2 x i32> @test0_4way_or_vec_non_splat(<2 x i32> %x, <2 x i32> %y, <2 x i32> %z) { +; CHECK-LABEL: @test0_4way_or_vec_non_splat( +; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i32> [[Z:%.*]], +; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[Y:%.*]], [[NOT]] +; CHECK-NEXT: [[AND1:%.*]] = and <2 x i32> [[AND]], [[X:%.*]] +; CHECK-NEXT: [[NOT2:%.*]] = xor <2 x i32> [[Y]], +; CHECK-NEXT: [[AND3:%.*]] = and <2 x i32> [[X]], [[NOT2]] +; CHECK-NEXT: [[AND4:%.*]] = and <2 x i32> [[AND3]], [[Z]] +; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[AND1]], [[AND4]] +; CHECK-NEXT: [[NOT5:%.*]] = xor <2 x i32> [[X]], +; CHECK-NEXT: [[NOT6:%.*]] = xor <2 x i32> [[Y]], +; CHECK-NEXT: [[AND7:%.*]] = and <2 x i32> [[NOT5]], [[NOT6]] +; CHECK-NEXT: [[NOT8:%.*]] = xor <2 x i32> [[Z]], +; CHECK-NEXT: [[AND9:%.*]] = and <2 x i32> [[AND7]], [[NOT8]] +; CHECK-NEXT: [[OR10:%.*]] = or <2 x i32> [[OR]], [[AND9]] +; CHECK-NEXT: [[AND11:%.*]] = and <2 x i32> [[X]], [[Y]] +; CHECK-NEXT: [[AND12:%.*]] = and <2 x i32> [[AND11]], [[Z]] +; CHECK-NEXT: [[OR13:%.*]] = or <2 x i32> [[OR10]], [[AND12]] +; CHECK-NEXT: ret <2 x i32> [[OR13]] +; + %not = xor <2 x i32> %z, + %and = and <2 x i32> %y, %not + %and1 = and <2 x i32> %and, %x + %not2 = xor <2 x i32> %y, + %and3 = and <2 x i32> %x, %not2 + %and4 = and <2 x i32> %and3, %z + %or = or <2 x i32> %and1, %and4 + %not5 = xor <2 x i32> %x, + %not6 = xor <2 x i32> %y, + %and7 = and <2 x i32> %not5, %not6 + %not8 = xor <2 x i32> %z, + %and9 = and <2 x i32> %and7, %not8 + %or10 = or <2 x i32> %or, %and9 + %and11 = and <2 x i32> %x, %y + %and12 = and <2 x i32> %and11, %z + %or13 = or <2 x i32> %or10, %and12 + ret <2 x i32> %or13 +} + +define <4 x i8> @test1_xor_pattern_vec_i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %z) { +; CHECK-LABEL: @test1_xor_pattern_vec_i8( +; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i8> [[Y:%.*]], [[Z:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i8> [[TMP1]], [[X:%.*]] +; CHECK-NEXT: [[XOR:%.*]] = xor <4 x i8> [[TMP2]], splat (i8 -1) +; CHECK-NEXT: ret <4 x i8> [[XOR]] +; + %not = xor <4 x i8> %z, + %and = and <4 x i8> %x, %y + %not1 = xor <4 x i8> %x, + %not2 = xor <4 x i8> %y, + %and3 = and <4 x i8> %not1, %not2 + %or = or <4 x i8> %and, %and3 + %and4 = and <4 x i8> %not, %or + %and5 = and <4 x i8> %x, %y + %and6 = and <4 x i8> %x, %not2 + %or7 = or <4 x i8> %and5, %and6 + %and8 = and <4 x i8> %z, %or7 + %xor = xor <4 x i8> %and4, %and8 + ret <4 x i8> %xor +}