diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index 62073ec125e8f..4393f6ecaa033 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -4721,9 +4721,6 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { if (!(Subtarget->hasVLX() || NVT.is512BitVector())) return false; - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - auto getFoldableLogicOp = [](SDValue Op) { // Peek through single use bitcast. if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) @@ -4740,13 +4737,47 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { return SDValue(); }; - SDValue A, FoldableOp; - if ((FoldableOp = getFoldableLogicOp(N1))) { - A = N0; - } else if ((FoldableOp = getFoldableLogicOp(N0))) { - A = N1; - } else - return false; + SDValue N0, N1, A, FoldableOp; + + // Identify and (optionally) peel an outer NOT that wraps a pure logic tree + auto tryPeelOuterNotWrappingLogic = [&](SDNode *Op) { + if (Op->getOpcode() == ISD::XOR && Op->hasOneUse() && + ISD::isBuildVectorAllOnes(Op->getOperand(1).getNode())) { + SDValue InnerOp = Op->getOperand(0); + + if (!getFoldableLogicOp(InnerOp)) + return SDValue(); + + N0 = InnerOp.getOperand(0); + N1 = InnerOp.getOperand(1); + if ((FoldableOp = getFoldableLogicOp(N1))) { + A = N0; + return InnerOp; + } + if ((FoldableOp = getFoldableLogicOp(N0))) { + A = N1; + return InnerOp; + } + } + return SDValue(); + }; + + bool PeeledOuterNot = false; + SDNode *OriN = N; + if (SDValue InnerOp = tryPeelOuterNotWrappingLogic(N)) { + PeeledOuterNot = true; + N = InnerOp.getNode(); + } else { + N0 = N->getOperand(0); + N1 = N->getOperand(1); + + if ((FoldableOp = getFoldableLogicOp(N1))) + A = N0; + else if ((FoldableOp = getFoldableLogicOp(N0))) + A = N1; + else + return false; + } SDValue B = FoldableOp.getOperand(0); SDValue C = FoldableOp.getOperand(1); @@ -4798,7 +4829,10 @@ bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { case ISD::XOR: Imm ^= TernlogMagicA; break; } - return matchVPTERNLOG(N, ParentA, ParentB, ParentC, A, B, C, Imm); + if (PeeledOuterNot) + Imm = ~Imm; + + return matchVPTERNLOG(OriN, ParentA, ParentB, ParentC, A, B, C, Imm); } /// If the high bits of an 'and' operand are known zero, try setting the diff --git a/llvm/test/CodeGen/X86/issue163738.ll b/llvm/test/CodeGen/X86/issue163738.ll new file mode 100644 index 0000000000000..61fe043a970dd --- /dev/null +++ b/llvm/test/CodeGen/X86/issue163738.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=CHECK + +define <8 x i64> @foo(<8 x i64> %a, <8 x i64> %b, <8 x i64> %c) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: +; CHECK-NEXT: vpternlogq {{.*#+}} zmm0 = ~(zmm0 | zmm2 | zmm1) +; CHECK-NEXT: retq + %and.demorgan = or <8 x i64> %b, %a + %and3.demorgan = or <8 x i64> %and.demorgan, %c + %and3 = xor <8 x i64> %and3.demorgan, splat (i64 -1) + ret <8 x i64> %and3 +}