diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index de88db2227979..e57ca7a31dce2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -50002,6 +50002,28 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit())); } +/// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z)) +/// This undoes the inverse fold performed in InstCombine +static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) { + + using namespace llvm::SDPatternMatch; + MVT VT = N->getSimpleValueType(0); + SDLoc DL(N); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.hasAndNot(SDValue(N, 0))) + return SDValue(); + + SDValue X, Y, Z; + if (sd_match( + N, m_And(m_Value(X), m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z))))))) + return DAG.getNode( + ISD::AND, DL, VT, X, + DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z), + VT)); + + return SDValue(); +} + // This function recognizes cases where X86 bzhi instruction can replace and // 'and-load' sequence. // In case of loading integer value from an array of constants which is defined @@ -50493,6 +50515,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) return R; + if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG)) + return R; + // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2)) // iff c2 is all/no bits mask - i.e. a select-with-zero mask. // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW? diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll new file mode 100644 index 0000000000000..1c6d2deb701af --- /dev/null +++ b/llvm/test/CodeGen/X86/pr108731.ll @@ -0,0 +1,61 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,NOBMI +; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,BMI + +define i64 @foo(i64 %w, i64 %x, i64 %y, i64 %z) { +; NOBMI-LABEL: foo: +; NOBMI: # %bb.0: # %Entry +; NOBMI-NEXT: movq %rcx, %rax +; NOBMI-NEXT: andq %rdx, %rsi +; NOBMI-NEXT: notq %rsi +; NOBMI-NEXT: andq %rdi, %rsi +; NOBMI-NEXT: notq %rax +; NOBMI-NEXT: orq %rdx, %rax +; NOBMI-NEXT: andq %rsi, %rax +; NOBMI-NEXT: retq +; +; BMI-LABEL: foo: +; BMI: # %bb.0: # %Entry +; BMI-NEXT: andq %rdx, %rsi +; BMI-NEXT: andnq %rdi, %rsi, %rax +; BMI-NEXT: andnq %rcx, %rdx, %rcx +; BMI-NEXT: andnq %rax, %rcx, %rax +; BMI-NEXT: retq +Entry: + %and1 = and i64 %y, %x + %xor1 = xor i64 %and1, -1 + %and2 = and i64 %xor1, %w + %.not = xor i64 %z, -1 + %or1 = or i64 %.not, %y + %and3 = and i64 %and2, %or1 + ret i64 %and3 +} + +define <16 x i8> @fooVec(<16 x i8> %w, <16 x i8> %x, <16 x i8> %y, <16 x i8> %z) { +; NOBMI-LABEL: fooVec: +; NOBMI: # %bb.0: # %Entry +; NOBMI-NEXT: andps %xmm2, %xmm1 +; NOBMI-NEXT: andnps %xmm0, %xmm1 +; NOBMI-NEXT: andnps %xmm3, %xmm2 +; NOBMI-NEXT: andnps %xmm1, %xmm2 +; NOBMI-NEXT: movaps %xmm2, %xmm0 +; NOBMI-NEXT: retq +; +; BMI-LABEL: fooVec: +; BMI: # %bb.0: # %Entry +; BMI-NEXT: vandps %xmm1, %xmm2, %xmm1 +; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; BMI-NEXT: vandnps %xmm3, %xmm2, %xmm1 +; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0 +; BMI-NEXT: retq +Entry: + %and1 = and <16 x i8> %y, %x + %xor1 = xor <16 x i8> %and1, + %and2 = and <16 x i8> %xor1, %w + %.not = xor <16 x i8> %z, + %or1 = or <16 x i8> %.not, %y + %and3 = and <16 x i8> %and2, %or1 + ret <16 x i8> %and3 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}