From ee35653e45cdfec53652118e9c93f0f12e597fbb Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 13:53:37 +0000 Subject: [PATCH 01/20] [X86]: Reassoc demorgan rule for ANDN --- llvm/lib/Target/X86/X86ISelLowering.cpp | 28 ++++++ llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 98 +++++++++++++++++++ 2 files changed, 126 insertions(+) create mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0b64ff370b10..e2632d114ce0b 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51651,6 +51651,31 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL, return AndN; } +// fold (not (or A, B)) -> nand(A, not(B)) if BMI +static SDValue +combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL, + SelectionDAG &DAG, + const X86Subtarget &Subtarget) { + using namespace llvm::SDPatternMatch; + + EVT VT = Xor->getValueType(0); + // Make sure this node is a candidate for BMI instructions. + if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) + return SDValue(); + + SDValue A; + SDValue B; + APInt Cst; + if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) && + Cst.isAllOnes())) + return SDValue(); + + auto Opcode = + Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr; + auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT)); + return SDValue(AndN, 0); +} + static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -55150,6 +55175,9 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineBMILogicOp(N, DAG, Subtarget)) return R; + if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget)) + return R; + return combineFneg(N, DAG, DCI, Subtarget); } diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll new file mode 100644 index 0000000000000..ea81d08cd2e6d --- /dev/null +++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64 + +define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind { +; X86-LABEL: reassoc_demorgan_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: notl %ecx +; X86-NEXT: andnl %ecx, %eax, %eax +; X86-NEXT: retl +; +; X64-LABEL: reassoc_demorgan_i32: +; X64: # %bb.0: +; X64-NEXT: notl %edi +; X64-NEXT: andnl %edi, %esi, %eax +; X64-NEXT: retq + %temp = or i32 %b, %a + %res = xor i32 %temp, -1 + ret i32 %res +} + +define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind { +; X86-LABEL: reassoc_demorgan_three_arguments_i32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: notl %eax +; X86-NEXT: andnl %eax, %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: reassoc_demorgan_three_arguments_i32: +; X64: # %bb.0: +; X64-NEXT: orl %esi, %edi +; X64-NEXT: notl %edx +; X64-NEXT: andnl %edx, %edi, %eax +; X64-NEXT: retq + %and.demorgan = or i32 %b, %a + %and3.demorgan = or i32 %and.demorgan, %c + %and3 = xor i32 %and3.demorgan, -1 + ret i32 %and3 +} + +define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind { +; X86-LABEL: reassoc_demorgan_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: notl %edx +; X86-NEXT: andnl %edx, %eax, %eax +; X86-NEXT: notl %esi +; X86-NEXT: andnl %esi, %ecx, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: reassoc_demorgan_i64: +; X64: # %bb.0: +; X64-NEXT: notq %rdi +; X64-NEXT: andnq %rdi, %rsi, %rax +; X64-NEXT: retq + %temp = or i64 %b, %a + %res = xor i64 %temp, -1 + ret i64 %res +} + +define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind { +; X86-LABEL: reassoc_demorgan_three_arguments_i64: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %esi +; X86-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-NEXT: notl %eax +; X86-NEXT: andnl %eax, %edx, %eax +; X86-NEXT: notl %ecx +; X86-NEXT: andnl %ecx, %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: reassoc_demorgan_three_arguments_i64: +; X64: # %bb.0: +; X64-NEXT: orq %rsi, %rdi +; X64-NEXT: notq %rdx +; X64-NEXT: andnq %rdx, %rdi, %rax +; X64-NEXT: retq + %and.demorgan = or i64 %b, %a + %and3.demorgan = or i64 %and.demorgan, %c + %and3 = xor i64 %and3.demorgan, -1 + ret i64 %and3 +} From deda3383ab0015a23d521f37d44a5714def2346c Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 18:24:40 +0000 Subject: [PATCH 02/20] [X86]: Removed obsolete code --- llvm/lib/Target/X86/X86ISelLowering.cpp | 28 ------------------------- 1 file changed, 28 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e2632d114ce0b..a0b64ff370b10 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -51651,31 +51651,6 @@ static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL, return AndN; } -// fold (not (or A, B)) -> nand(A, not(B)) if BMI -static SDValue -combineReassocDemorganWithNANDWithBMI(SDNode *Xor, const SDLoc &DL, - SelectionDAG &DAG, - const X86Subtarget &Subtarget) { - using namespace llvm::SDPatternMatch; - - EVT VT = Xor->getValueType(0); - // Make sure this node is a candidate for BMI instructions. - if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64)) - return SDValue(); - - SDValue A; - SDValue B; - APInt Cst; - if (!(sd_match(Xor, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) && - Cst.isAllOnes())) - return SDValue(); - - auto Opcode = - Subtarget.is64Bit() && VT == MVT::i64 ? X86::ANDN64rr : X86::ANDN32rr; - auto AndN = DAG.getMachineNode(Opcode, DL, VT, A, DAG.getNOT(DL, B, VT)); - return SDValue(AndN, 0); -} - static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -55175,9 +55150,6 @@ static SDValue combineXor(SDNode *N, SelectionDAG &DAG, if (SDValue R = combineBMILogicOp(N, DAG, Subtarget)) return R; - if (SDValue R = combineReassocDemorganWithNANDWithBMI(N, DL, DAG, Subtarget)) - return R; - return combineFneg(N, DAG, DCI, Subtarget); } From cdb57ef348cbd3ac3bd6b361fd6a3b4ddb9ff8fb Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 18:25:21 +0000 Subject: [PATCH 03/20] [DAG]: Reassoc demorgan rule for ANDN --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c97300d64d455..0629b75989233 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10197,6 +10197,22 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } + // fold (not (or A, B)) -> and(not(A), not(B)) + if (TLI.hasAndNot(SDValue(N, 0))) { + // If we have AndNot then it is profitable to apply demorgan to make use + // of the machine instruction. + SDValue A; + SDValue B; + APInt Cst; + if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) && + Cst.isAllOnes()) { + return DAG.getNode( + ISD::AND, DL, VT, + DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)), + DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT))); + } + } + return SDValue(); } From 9e4103d5d49bbc92b1fdfad30e5e52f51f9c70e2 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 18:35:14 +0000 Subject: [PATCH 04/20] [DAG]: Fixed type --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0629b75989233..5b77dc423b66b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10206,10 +10206,11 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { APInt Cst; if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) && Cst.isAllOnes()) { + auto Ty = N->getValueType(0); return DAG.getNode( ISD::AND, DL, VT, - DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(-1, DL, VT)), - DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(-1, DL, VT))); + DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)), + DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty))); } } From cf6ee582057a42e9ec8f5b81355c2bee8a8067cb Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 18:45:46 +0000 Subject: [PATCH 05/20] [X86]: Updated tests --- llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 210 ++++++++++++------ 1 file changed, 143 insertions(+), 67 deletions(-) diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll index ea81d08cd2e6d..7f3a376b24b2a 100644 --- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll +++ b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll @@ -1,42 +1,75 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind { -; X86-LABEL: reassoc_demorgan_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: notl %ecx -; X86-NEXT: andnl %ecx, %eax, %eax -; X86-NEXT: retl +; X86-WITH-BMI-LABEL: reassoc_demorgan_i32: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: notl %ecx +; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax +; X86-WITH-BMI-NEXT: retl ; -; X64-LABEL: reassoc_demorgan_i32: -; X64: # %bb.0: -; X64-NEXT: notl %edi -; X64-NEXT: andnl %edi, %esi, %eax -; X64-NEXT: retq +; X64-WITH-BMI-LABEL: reassoc_demorgan_i32: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notl %edi +; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movl %edi, %eax +; X64-WITHOUT-BMI-NEXT: orl %esi, %eax +; X64-WITHOUT-BMI-NEXT: notl %eax +; X64-WITHOUT-BMI-NEXT: retq %temp = or i32 %b, %a %res = xor i32 %temp, -1 ret i32 %res } define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind { -; X86-LABEL: reassoc_demorgan_three_arguments_i32: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: orl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: notl %eax -; X86-NEXT: andnl %eax, %ecx, %eax -; X86-NEXT: retl +; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: notl %edx +; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx +; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notl %edi +; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax +; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: retl ; -; X64-LABEL: reassoc_demorgan_three_arguments_i32: -; X64: # %bb.0: -; X64-NEXT: orl %esi, %edi -; X64-NEXT: notl %edx -; X64-NEXT: andnl %edx, %edi, %eax -; X64-NEXT: retq +; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movl %edi, %eax +; X64-WITHOUT-BMI-NEXT: orl %esi, %eax +; X64-WITHOUT-BMI-NEXT: orl %edx, %eax +; X64-WITHOUT-BMI-NEXT: notl %eax +; X64-WITHOUT-BMI-NEXT: retq %and.demorgan = or i32 %b, %a %and3.demorgan = or i32 %and.demorgan, %c %and3 = xor i32 %and3.demorgan, -1 @@ -44,53 +77,96 @@ define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwin } define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind { -; X86-LABEL: reassoc_demorgan_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: notl %edx -; X86-NEXT: andnl %edx, %eax, %eax -; X86-NEXT: notl %esi -; X86-NEXT: andnl %esi, %ecx, %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-WITH-BMI-LABEL: reassoc_demorgan_i64: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: pushl %esi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-WITH-BMI-NEXT: notl %edx +; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax +; X86-WITH-BMI-NEXT: notl %esi +; X86-WITH-BMI-NEXT: andnl %esi, %ecx, %edx +; X86-WITH-BMI-NEXT: popl %esi +; X86-WITH-BMI-NEXT: retl ; -; X64-LABEL: reassoc_demorgan_i64: -; X64: # %bb.0: -; X64-NEXT: notq %rdi -; X64-NEXT: andnq %rdi, %rsi, %rax -; X64-NEXT: retq +; X64-WITH-BMI-LABEL: reassoc_demorgan_i64: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notq %rdi +; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: notl %edx +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax +; X64-WITHOUT-BMI-NEXT: notq %rax +; X64-WITHOUT-BMI-NEXT: retq %temp = or i64 %b, %a %res = xor i64 %temp, -1 ret i64 %res } define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind { -; X86-LABEL: reassoc_demorgan_three_arguments_i64: -; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %esi -; X86-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-NEXT: notl %eax -; X86-NEXT: andnl %eax, %edx, %eax -; X86-NEXT: notl %ecx -; X86-NEXT: andnl %ecx, %esi, %edx -; X86-NEXT: popl %esi -; X86-NEXT: retl +; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: pushl %ebx +; X86-WITH-BMI-NEXT: pushl %edi +; X86-WITH-BMI-NEXT: pushl %esi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-WITH-BMI-NEXT: notl %edi +; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx +; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax +; X86-WITH-BMI-NEXT: notl %ebx +; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx +; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx +; X86-WITH-BMI-NEXT: popl %esi +; X86-WITH-BMI-NEXT: popl %edi +; X86-WITH-BMI-NEXT: popl %ebx +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notq %rdi +; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: notl %edx +; X86-WITHOUT-BMI-NEXT: retl ; -; X64-LABEL: reassoc_demorgan_three_arguments_i64: -; X64: # %bb.0: -; X64-NEXT: orq %rsi, %rdi -; X64-NEXT: notq %rdx -; X64-NEXT: andnq %rdx, %rdi, %rax -; X64-NEXT: retq +; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax +; X64-WITHOUT-BMI-NEXT: notq %rax +; X64-WITHOUT-BMI-NEXT: retq %and.demorgan = or i64 %b, %a %and3.demorgan = or i64 %and.demorgan, %c %and3 = xor i64 %and3.demorgan, -1 From cef0067d84a41aad176352f02d9585dd71355a71 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Thu, 16 Oct 2025 20:16:46 +0000 Subject: [PATCH 06/20] [DAG]: Updated tests --- llvm/test/CodeGen/X86/andnot-patterns.ll | 184 ++++++++++++++--------- 1 file changed, 116 insertions(+), 68 deletions(-) diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll index fc573fbd4fc99..0701d7046fc35 100644 --- a/llvm/test/CodeGen/X86/andnot-patterns.ll +++ b/llvm/test/CodeGen/X86/andnot-patterns.ll @@ -761,6 +761,7 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; ; X86-BMI-LABEL: andnot_bitreverse_i64: ; X86-BMI: # %bb.0: +; X86-BMI-NEXT: pushl %esi ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: bswapl %eax @@ -774,13 +775,16 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %eax ; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-BMI-NEXT: leal (%eax,%edx,4), %eax -; X86-BMI-NEXT: movl %eax, %edx -; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%edx,4), %esi +; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-BMI-NEXT: addl %esi, %esi +; X86-BMI-NEXT: shll $2, %edx +; X86-BMI-NEXT: notl %edx +; X86-BMI-NEXT: andnl %edx, %eax, %eax ; X86-BMI-NEXT: shrl %eax -; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X86-BMI-NEXT: leal (%eax,%edx,2), %eax -; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-BMI-NEXT: andnl %eax, %esi, %eax +; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: bswapl %ecx ; X86-BMI-NEXT: movl %ecx, %edx ; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F @@ -792,13 +796,17 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %ecx ; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-BMI-NEXT: leal (%ecx,%edx,4), %ecx -; X86-BMI-NEXT: movl %ecx, %edx -; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-BMI-NEXT: leal (%ecx,%edx,4), %esi +; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 +; X86-BMI-NEXT: addl %esi, %esi +; X86-BMI-NEXT: shll $2, %edx +; X86-BMI-NEXT: notl %edx +; X86-BMI-NEXT: andnl %edx, %ecx, %ecx ; X86-BMI-NEXT: shrl %ecx -; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X86-BMI-NEXT: leal (%ecx,%edx,2), %ecx -; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx +; X86-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA +; X86-BMI-NEXT: andnl %ecx, %esi, %edx +; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-BMI-NEXT: popl %esi ; X86-BMI-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i64: @@ -837,19 +845,23 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X64-BMI-NEXT: andq %rcx, %rsi ; X64-BMI-NEXT: shlq $4, %rsi ; X64-BMI-NEXT: orq %rax, %rsi -; X64-BMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 -; X64-BMI-NEXT: movq %rsi, %rcx -; X64-BMI-NEXT: andq %rax, %rcx -; X64-BMI-NEXT: shrq $2, %rsi -; X64-BMI-NEXT: andq %rax, %rsi -; X64-BMI-NEXT: leaq (%rsi,%rcx,4), %rax -; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 -; X64-BMI-NEXT: movq %rax, %rdx -; X64-BMI-NEXT: andq %rcx, %rdx -; X64-BMI-NEXT: shrq %rax +; X64-BMI-NEXT: movq %rsi, %rax +; X64-BMI-NEXT: shrq $2, %rax +; X64-BMI-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 ; X64-BMI-NEXT: andq %rcx, %rax -; X64-BMI-NEXT: leaq (%rax,%rdx,2), %rax -; X64-BMI-NEXT: andnq %rdi, %rax, %rax +; X64-BMI-NEXT: andq %rcx, %rsi +; X64-BMI-NEXT: leaq (,%rsi,4), %rcx +; X64-BMI-NEXT: notq %rcx +; X64-BMI-NEXT: andnq %rcx, %rax, %rcx +; X64-BMI-NEXT: shrq %rcx +; X64-BMI-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA +; X64-BMI-NEXT: orq %rcx, %rdx +; X64-BMI-NEXT: leaq (%rax,%rsi,4), %rax +; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-BMI-NEXT: andq %rax, %rcx +; X64-BMI-NEXT: addq %rcx, %rcx +; X64-BMI-NEXT: andnq %rdx, %rcx, %rax +; X64-BMI-NEXT: andq %rdi, %rax ; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not) @@ -896,13 +908,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %eax ; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-BMI-NEXT: leal (%eax,%ecx,4), %eax -; X86-BMI-NEXT: movl %eax, %ecx -; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx +; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 +; X86-BMI-NEXT: addl %edx, %edx +; X86-BMI-NEXT: shll $2, %ecx +; X86-BMI-NEXT: notl %ecx +; X86-BMI-NEXT: andnl %ecx, %eax, %eax ; X86-BMI-NEXT: shrl %eax -; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X86-BMI-NEXT: leal (%eax,%ecx,2), %eax -; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax +; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-BMI-NEXT: andnl %eax, %edx, %eax +; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i32: @@ -940,16 +955,19 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { ; X64-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F ; X64-BMI-NEXT: orl %eax, %esi ; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: shrl $2, %eax ; X64-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X64-BMI-NEXT: shrl $2, %esi ; X64-BMI-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax -; X64-BMI-NEXT: movl %eax, %ecx -; X64-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 -; X64-BMI-NEXT: shrl %eax +; X64-BMI-NEXT: leal (,%rsi,4), %ecx +; X64-BMI-NEXT: notl %ecx +; X64-BMI-NEXT: andnl %ecx, %eax, %ecx +; X64-BMI-NEXT: shrl %ecx +; X64-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA +; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax ; X64-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax -; X64-BMI-NEXT: andnl %edi, %eax, %eax +; X64-BMI-NEXT: addl %eax, %eax +; X64-BMI-NEXT: andnl %ecx, %eax, %eax +; X64-BMI-NEXT: andl %edi, %eax ; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not) @@ -958,30 +976,57 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { } define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { -; X86-LABEL: andnot_bitreverse_i16: -; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: rolw $8, %ax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $3855, %ecx # imm = 0xF0F -; X86-NEXT: shll $4, %ecx -; X86-NEXT: shrl $4, %eax -; X86-NEXT: andl $3855, %eax # imm = 0xF0F -; X86-NEXT: orl %ecx, %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $13107, %ecx # imm = 0x3333 -; X86-NEXT: shrl $2, %eax -; X86-NEXT: andl $13107, %eax # imm = 0x3333 -; X86-NEXT: leal (%eax,%ecx,4), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: andl $21845, %ecx # imm = 0x5555 -; X86-NEXT: shrl %eax -; X86-NEXT: andl $21845, %eax # imm = 0x5555 -; X86-NEXT: leal (%eax,%ecx,2), %eax -; X86-NEXT: notl %eax -; X86-NEXT: andw {{[0-9]+}}(%esp), %ax -; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: retl +; X86-NOBMI-LABEL: andnot_bitreverse_i16: +; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NOBMI-NEXT: rolw $8, %ax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $3855, %ecx # imm = 0xF0F +; X86-NOBMI-NEXT: shll $4, %ecx +; X86-NOBMI-NEXT: shrl $4, %eax +; X86-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F +; X86-NOBMI-NEXT: orl %ecx, %eax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $13107, %ecx # imm = 0x3333 +; X86-NOBMI-NEXT: shrl $2, %eax +; X86-NOBMI-NEXT: andl $13107, %eax # imm = 0x3333 +; X86-NOBMI-NEXT: leal (%eax,%ecx,4), %eax +; X86-NOBMI-NEXT: movl %eax, %ecx +; X86-NOBMI-NEXT: andl $21845, %ecx # imm = 0x5555 +; X86-NOBMI-NEXT: shrl %eax +; X86-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555 +; X86-NOBMI-NEXT: leal (%eax,%ecx,2), %eax +; X86-NOBMI-NEXT: notl %eax +; X86-NOBMI-NEXT: andw {{[0-9]+}}(%esp), %ax +; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NOBMI-NEXT: retl +; +; X86-BMI-LABEL: andnot_bitreverse_i16: +; X86-BMI: # %bb.0: +; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: rolw $8, %ax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $3855, %ecx # imm = 0xF0F +; X86-BMI-NEXT: shll $4, %ecx +; X86-BMI-NEXT: shrl $4, %eax +; X86-BMI-NEXT: andl $3855, %eax # imm = 0xF0F +; X86-BMI-NEXT: orl %ecx, %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $13107, %ecx # imm = 0x3333 +; X86-BMI-NEXT: shrl $2, %eax +; X86-BMI-NEXT: andl $13107, %eax # imm = 0x3333 +; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx +; X86-BMI-NEXT: andl $21845, %edx # imm = 0x5555 +; X86-BMI-NEXT: addl %edx, %edx +; X86-BMI-NEXT: shll $2, %ecx +; X86-BMI-NEXT: notl %ecx +; X86-BMI-NEXT: andnl %ecx, %eax, %eax +; X86-BMI-NEXT: shrl %eax +; X86-BMI-NEXT: orl $43690, %eax # imm = 0xAAAA +; X86-BMI-NEXT: andnl %eax, %edx, %eax +; X86-BMI-NEXT: andw {{[0-9]+}}(%esp), %ax +; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax +; X86-BMI-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i16: ; X64-NOBMI: # %bb.0: @@ -1019,16 +1064,19 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { ; X64-BMI-NEXT: andl $3855, %esi # imm = 0xF0F ; X64-BMI-NEXT: orl %eax, %esi ; X64-BMI-NEXT: movl %esi, %eax +; X64-BMI-NEXT: shrl $2, %eax ; X64-BMI-NEXT: andl $13107, %eax # imm = 0x3333 -; X64-BMI-NEXT: shrl $2, %esi ; X64-BMI-NEXT: andl $13107, %esi # imm = 0x3333 -; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax -; X64-BMI-NEXT: movl %eax, %ecx -; X64-BMI-NEXT: andl $21845, %ecx # imm = 0x5555 -; X64-BMI-NEXT: shrl %eax +; X64-BMI-NEXT: leal (,%rsi,4), %ecx +; X64-BMI-NEXT: notl %ecx +; X64-BMI-NEXT: andnl %ecx, %eax, %ecx +; X64-BMI-NEXT: shrl %ecx +; X64-BMI-NEXT: orl $-21846, %ecx # imm = 0xAAAA +; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax ; X64-BMI-NEXT: andl $21845, %eax # imm = 0x5555 -; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax -; X64-BMI-NEXT: andnl %edi, %eax, %eax +; X64-BMI-NEXT: addl %eax, %eax +; X64-BMI-NEXT: andnl %ecx, %eax, %eax +; X64-BMI-NEXT: andl %edi, %eax ; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 From e31e5ab5bc605bd83d9090b207fabf7094294679 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Fri, 17 Oct 2025 05:08:17 +0000 Subject: [PATCH 07/20] [DAG]: Updated tests --- .../AArch64/neon-compare-instructions.ll | 264 ++++++++++++------ llvm/test/CodeGen/PowerPC/vsx.ll | 62 ++-- 2 files changed, 219 insertions(+), 107 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 11b3b62ec1c8d..60c6d84679451 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2217,13 +2217,21 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { -; CHECK-LABEL: fcmuno2xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s -; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno2xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s +; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mvn v1.8b, v2.8b +; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno2xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp3 = fcmp uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2231,13 +2239,21 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { -; CHECK-LABEL: fcmuno4xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno4xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno4xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2245,13 +2261,21 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { -; CHECK-LABEL: fcmuno2xdouble: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d -; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno2xdouble: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d +; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno2xdouble: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -2259,13 +2283,21 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { -; CHECK-LABEL: fcmueq2xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v2.2s, v0.2s, v1.2s -; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueq2xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s +; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mvn v1.8b, v2.8b +; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueq2xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v2.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2273,13 +2305,21 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { -; CHECK-LABEL: fcmueq4xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s -; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueq4xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueq4xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2287,13 +2327,21 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) { -; CHECK-LABEL: fcmueq2xdouble: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v2.2d, v0.2d, v1.2d -; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueq2xdouble: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d +; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueq2xdouble: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v2.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -2792,13 +2840,21 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { -; CHECK-LABEL: fcmueqz2xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v1.2s, v0.2s, #0.0 -; CHECK-NEXT: fcmlt v0.2s, v0.2s, #0.0 -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueqz2xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmlt v1.2s, v0.2s, #0.0 +; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, #0.0 +; CHECK-SD-NEXT: mvn v1.8b, v1.8b +; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueqz2xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v1.2s, v0.2s, #0.0 +; CHECK-GI-NEXT: fcmlt v0.2s, v0.2s, #0.0 +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2806,13 +2862,21 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { -; CHECK-LABEL: fcmueqz4xfloat: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v1.4s, v0.4s, #0.0 -; CHECK-NEXT: fcmlt v0.4s, v0.4s, #0.0 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueqz4xfloat: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmlt v1.4s, v0.4s, #0.0 +; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-SD-NEXT: mvn v1.16b, v1.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueqz4xfloat: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v1.4s, v0.4s, #0.0 +; CHECK-GI-NEXT: fcmlt v0.4s, v0.4s, #0.0 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2820,13 +2884,21 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) { -; CHECK-LABEL: fcmueqz2xdouble: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmgt v1.2d, v0.2d, #0.0 -; CHECK-NEXT: fcmlt v0.2d, v0.2d, #0.0 -; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmueqz2xdouble: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmlt v1.2d, v0.2d, #0.0 +; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, #0.0 +; CHECK-SD-NEXT: mvn v1.16b, v1.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmueqz2xdouble: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmgt v1.2d, v0.2d, #0.0 +; CHECK-GI-NEXT: fcmlt v0.2d, v0.2d, #0.0 +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -3286,39 +3358,63 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) { define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) { -; CHECK-LABEL: fcmuno2xfloat_fast: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s -; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno2xfloat_fast: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s +; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mvn v1.8b, v2.8b +; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno2xfloat_fast: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s +; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-GI-NEXT: ret %tmp3 = fcmp fast uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) { -; CHECK-LABEL: fcmuno4xfloat_fast: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno4xfloat_fast: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s +; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno4xfloat_fast: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp fast uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) { -; CHECK-LABEL: fcmuno2xdouble_fast: -; CHECK: // %bb.0: -; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d -; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: fcmuno2xdouble_fast: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d +; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d +; CHECK-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: fcmuno2xdouble_fast: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d +; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-GI-NEXT: ret %tmp3 = fcmp fast uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 14b3d69f8c273..3cde26271d50e 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -378,23 +378,27 @@ entry: define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor v2, v2, v3 +; CHECK-NEXT: xxlnor vs0, v2, v2 +; CHECK-NEXT: xxlandc v2, vs0, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test14: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor v2, v2, v3 +; CHECK-REG-NEXT: xxlnor vs0, v2, v2 +; CHECK-REG-NEXT: xxlandc v2, vs0, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test14: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlnor v2, v2, v3 +; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 +; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test14: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor v2, v2, v3 +; CHECK-LE-NEXT: xxlnor vs0, v2, v2 +; CHECK-LE-NEXT: xxlandc v2, vs0, v3 ; CHECK-LE-NEXT: blr entry: %v = or <4 x i32> %a, %b @@ -408,23 +412,27 @@ entry: define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor v2, v2, v3 +; CHECK-NEXT: xxlnor vs0, v2, v2 +; CHECK-NEXT: xxlandc v2, vs0, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test15: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor v2, v2, v3 +; CHECK-REG-NEXT: xxlnor vs0, v2, v2 +; CHECK-REG-NEXT: xxlandc v2, vs0, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test15: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor v4, v2, v3 -; CHECK-FISL-NEXT: xxlnor v2, v2, v3 +; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 +; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test15: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor v2, v2, v3 +; CHECK-LE-NEXT: xxlnor vs0, v2, v2 +; CHECK-LE-NEXT: xxlandc v2, vs0, v3 ; CHECK-LE-NEXT: blr entry: %v = or <8 x i16> %a, %b @@ -438,23 +446,27 @@ entry: define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor v2, v2, v3 +; CHECK-NEXT: xxlnor vs0, v2, v2 +; CHECK-NEXT: xxlandc v2, vs0, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test16: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor v2, v2, v3 +; CHECK-REG-NEXT: xxlnor vs0, v2, v2 +; CHECK-REG-NEXT: xxlandc v2, vs0, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test16: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor v4, v2, v3 -; CHECK-FISL-NEXT: xxlnor v2, v2, v3 +; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 +; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test16: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor v2, v2, v3 +; CHECK-LE-NEXT: xxlnor vs0, v2, v2 +; CHECK-LE-NEXT: xxlandc v2, vs0, v3 ; CHECK-LE-NEXT: blr entry: %v = or <16 x i8> %a, %b @@ -624,34 +636,38 @@ entry: define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvcmpgtsp vs0, v5, v4 ; CHECK-NEXT: xvcmpgtsp vs1, v4, v5 -; CHECK-NEXT: xxlor vs0, vs1, vs0 -; CHECK-NEXT: xxsel v2, v2, v3, vs0 +; CHECK-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-NEXT: xxlnor vs1, vs1, vs1 +; CHECK-NEXT: xxlandc vs0, vs1, vs0 +; CHECK-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test22: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4 ; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5 -; CHECK-REG-NEXT: xxlor vs0, vs1, vs0 -; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0 +; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1 +; CHECK-REG-NEXT: xxlandc vs0, vs1, vs0 +; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test22: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4 ; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5 -; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 -; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0 +; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs0 +; CHECK-FISL-NEXT: xxlandc vs0, vs0, vs1 +; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test22: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4 ; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5 -; CHECK-LE-NEXT: xxlor vs0, vs1, vs0 -; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0 +; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4 +; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1 +; CHECK-LE-NEXT: xxlandc vs0, vs1, vs0 +; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-LE-NEXT: blr entry: %m = fcmp ueq <4 x float> %c, %d From cf9da8380eacff36dd4173c2afd7e39e36c1e50a Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:22:07 +0000 Subject: [PATCH 08/20] Revert "[DAG]: Updated tests" This reverts commit e31e5ab5bc605bd83d9090b207fabf7094294679. --- .../AArch64/neon-compare-instructions.ll | 264 ++++++------------ llvm/test/CodeGen/PowerPC/vsx.ll | 62 ++-- 2 files changed, 107 insertions(+), 219 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll index 60c6d84679451..11b3b62ec1c8d 100644 --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2217,21 +2217,13 @@ define <2 x i64> @fcmord2xdouble(<2 x double> %A, <2 x double> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { -; CHECK-SD-LABEL: fcmuno2xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s -; CHECK-SD-NEXT: mvn v1.8b, v2.8b -; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno2xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno2xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: ret %tmp3 = fcmp uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2239,21 +2231,13 @@ define <2 x i32> @fcmuno2xfloat(<2 x float> %A, <2 x float> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { -; CHECK-SD-LABEL: fcmuno4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2261,21 +2245,13 @@ define <4 x i32> @fcmuno4xfloat(<4 x float> %A, <4 x float> %B) { ; UNO = !(OGE | OLT), OLT implemented as OGT, so check reversed operands. define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { -; CHECK-SD-LABEL: fcmuno2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d +; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -2283,21 +2259,13 @@ define <2 x i64> @fcmuno2xdouble(<2 x double> %A, <2 x double> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { -; CHECK-SD-LABEL: fcmueq2xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, v1.2s -; CHECK-SD-NEXT: mvn v1.8b, v2.8b -; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueq2xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v2.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueq2xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2305,21 +2273,13 @@ define <2 x i32> @fcmueq2xfloat(<2 x float> %A, <2 x float> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { -; CHECK-SD-LABEL: fcmueq4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueq4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v2.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueq4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2327,21 +2287,13 @@ define <4 x i32> @fcmueq4xfloat(<4 x float> %A, <4 x float> %B) { ; UEQ = !ONE = !(OGT | OLT), OLT implemented as OGT so check reversed operands define <2 x i64> @fcmueq2xdouble(<2 x double> %A, <2 x double> %B) { -; CHECK-SD-LABEL: fcmueq2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueq2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v2.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueq2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v2.2d, v0.2d, v1.2d +; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -2840,21 +2792,13 @@ define <2 x i64> @fcmordz2xdouble(<2 x double> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { -; CHECK-SD-LABEL: fcmueqz2xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmlt v1.2s, v0.2s, #0.0 -; CHECK-SD-NEXT: fcmgt v0.2s, v0.2s, #0.0 -; CHECK-SD-NEXT: mvn v1.8b, v1.8b -; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueqz2xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v1.2s, v0.2s, #0.0 -; CHECK-GI-NEXT: fcmlt v0.2s, v0.2s, #0.0 -; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueqz2xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v1.2s, v0.2s, #0.0 +; CHECK-NEXT: fcmlt v0.2s, v0.2s, #0.0 +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <2 x float> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 @@ -2862,21 +2806,13 @@ define <2 x i32> @fcmueqz2xfloat(<2 x float> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { -; CHECK-SD-LABEL: fcmueqz4xfloat: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmlt v1.4s, v0.4s, #0.0 -; CHECK-SD-NEXT: fcmgt v0.4s, v0.4s, #0.0 -; CHECK-SD-NEXT: mvn v1.16b, v1.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueqz4xfloat: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v1.4s, v0.4s, #0.0 -; CHECK-GI-NEXT: fcmlt v0.4s, v0.4s, #0.0 -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueqz4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v1.4s, v0.4s, #0.0 +; CHECK-NEXT: fcmlt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <4 x float> %A, zeroinitializer %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 @@ -2884,21 +2820,13 @@ define <4 x i32> @fcmueqz4xfloat(<4 x float> %A) { ; UEQ with zero = !ONE = !(OLT |OGT) define <2 x i64> @fcmueqz2xdouble(<2 x double> %A) { -; CHECK-SD-LABEL: fcmueqz2xdouble: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmlt v1.2d, v0.2d, #0.0 -; CHECK-SD-NEXT: fcmgt v0.2d, v0.2d, #0.0 -; CHECK-SD-NEXT: mvn v1.16b, v1.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmueqz2xdouble: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmgt v1.2d, v0.2d, #0.0 -; CHECK-GI-NEXT: fcmlt v0.2d, v0.2d, #0.0 -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmueqz2xdouble: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmgt v1.2d, v0.2d, #0.0 +; CHECK-NEXT: fcmlt v0.2d, v0.2d, #0.0 +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp ueq <2 x double> %A, zeroinitializer %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 @@ -3358,63 +3286,39 @@ define <2 x i64> @fcmord2xdouble_fast(<2 x double> %A, <2 x double> %B) { define <2 x i32> @fcmuno2xfloat_fast(<2 x float> %A, <2 x float> %B) { -; CHECK-SD-LABEL: fcmuno2xfloat_fast: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2s, v1.2s, v0.2s -; CHECK-SD-NEXT: fcmge v0.2s, v0.2s, v1.2s -; CHECK-SD-NEXT: mvn v1.8b, v2.8b -; CHECK-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno2xfloat_fast: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.2s, v0.2s, v1.2s -; CHECK-GI-NEXT: fcmgt v0.2s, v1.2s, v0.2s -; CHECK-GI-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno2xfloat_fast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fcmgt v0.2s, v1.2s, v0.2s +; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: ret %tmp3 = fcmp fast uno <2 x float> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @fcmuno4xfloat_fast(<4 x float> %A, <4 x float> %B) { -; CHECK-SD-LABEL: fcmuno4xfloat_fast: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.4s, v1.4s, v0.4s -; CHECK-SD-NEXT: fcmge v0.4s, v0.4s, v1.4s -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno4xfloat_fast: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.4s, v0.4s, v1.4s -; CHECK-GI-NEXT: fcmgt v0.4s, v1.4s, v0.4s -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno4xfloat_fast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.4s, v0.4s, v1.4s +; CHECK-NEXT: fcmgt v0.4s, v1.4s, v0.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp fast uno <4 x float> %A, %B %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @fcmuno2xdouble_fast(<2 x double> %A, <2 x double> %B) { -; CHECK-SD-LABEL: fcmuno2xdouble_fast: -; CHECK-SD: // %bb.0: -; CHECK-SD-NEXT: fcmgt v2.2d, v1.2d, v0.2d -; CHECK-SD-NEXT: fcmge v0.2d, v0.2d, v1.2d -; CHECK-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-SD-NEXT: ret -; -; CHECK-GI-LABEL: fcmuno2xdouble_fast: -; CHECK-GI: // %bb.0: -; CHECK-GI-NEXT: fcmge v2.2d, v0.2d, v1.2d -; CHECK-GI-NEXT: fcmgt v0.2d, v1.2d, v0.2d -; CHECK-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-GI-NEXT: ret +; CHECK-LABEL: fcmuno2xdouble_fast: +; CHECK: // %bb.0: +; CHECK-NEXT: fcmge v2.2d, v0.2d, v1.2d +; CHECK-NEXT: fcmgt v0.2d, v1.2d, v0.2d +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: ret %tmp3 = fcmp fast uno <2 x double> %A, %B %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll index 3cde26271d50e..14b3d69f8c273 100644 --- a/llvm/test/CodeGen/PowerPC/vsx.ll +++ b/llvm/test/CodeGen/PowerPC/vsx.ll @@ -378,27 +378,23 @@ entry: define <4 x i32> @test14(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test14: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v2, v2 -; CHECK-NEXT: xxlandc v2, vs0, v3 +; CHECK-NEXT: xxlnor v2, v2, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test14: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor vs0, v2, v2 -; CHECK-REG-NEXT: xxlandc v2, vs0, v3 +; CHECK-REG-NEXT: xxlnor v2, v2, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test14: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 -; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test14: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor vs0, v2, v2 -; CHECK-LE-NEXT: xxlandc v2, vs0, v3 +; CHECK-LE-NEXT: xxlnor v2, v2, v3 ; CHECK-LE-NEXT: blr entry: %v = or <4 x i32> %a, %b @@ -412,27 +408,23 @@ entry: define <8 x i16> @test15(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test15: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v2, v2 -; CHECK-NEXT: xxlandc v2, vs0, v3 +; CHECK-NEXT: xxlnor v2, v2, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test15: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor vs0, v2, v2 -; CHECK-REG-NEXT: xxlandc v2, vs0, v3 +; CHECK-REG-NEXT: xxlnor v2, v2, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test15: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor v4, v2, v3 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 -; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test15: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor vs0, v2, v2 -; CHECK-LE-NEXT: xxlandc v2, vs0, v3 +; CHECK-LE-NEXT: xxlnor v2, v2, v3 ; CHECK-LE-NEXT: blr entry: %v = or <8 x i16> %a, %b @@ -446,27 +438,23 @@ entry: define <16 x i8> @test16(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v2, v2 -; CHECK-NEXT: xxlandc v2, vs0, v3 +; CHECK-NEXT: xxlnor v2, v2, v3 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test16: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xxlnor vs0, v2, v2 -; CHECK-REG-NEXT: xxlandc v2, vs0, v3 +; CHECK-REG-NEXT: xxlnor v2, v2, v3 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test16: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xxlor v4, v2, v3 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 -; CHECK-FISL-NEXT: xxlandc v2, vs0, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test16: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xxlnor vs0, v2, v2 -; CHECK-LE-NEXT: xxlandc v2, vs0, v3 +; CHECK-LE-NEXT: xxlnor v2, v2, v3 ; CHECK-LE-NEXT: blr entry: %v = or <16 x i8> %a, %b @@ -636,38 +624,34 @@ entry: define <4 x float> @test22(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d) { ; CHECK-LABEL: test22: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvcmpgtsp vs1, v4, v5 ; CHECK-NEXT: xvcmpgtsp vs0, v5, v4 -; CHECK-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-NEXT: xxlandc vs0, vs1, vs0 -; CHECK-NEXT: xxsel v2, v3, v2, vs0 +; CHECK-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-NEXT: xxlor vs0, vs1, vs0 +; CHECK-NEXT: xxsel v2, v2, v3, vs0 ; CHECK-NEXT: blr ; ; CHECK-REG-LABEL: test22: ; CHECK-REG: # %bb.0: # %entry -; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5 ; CHECK-REG-NEXT: xvcmpgtsp vs0, v5, v4 -; CHECK-REG-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-REG-NEXT: xxlandc vs0, vs1, vs0 -; CHECK-REG-NEXT: xxsel v2, v3, v2, vs0 +; CHECK-REG-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-REG-NEXT: xxlor vs0, vs1, vs0 +; CHECK-REG-NEXT: xxsel v2, v2, v3, vs0 ; CHECK-REG-NEXT: blr ; ; CHECK-FISL-LABEL: test22: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xvcmpgtsp vs1, v5, v4 ; CHECK-FISL-NEXT: xvcmpgtsp vs0, v4, v5 -; CHECK-FISL-NEXT: xxlnor vs0, vs0, vs0 -; CHECK-FISL-NEXT: xxlandc vs0, vs0, vs1 -; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 +; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 +; CHECK-FISL-NEXT: xxsel v2, v2, v3, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test22: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5 ; CHECK-LE-NEXT: xvcmpgtsp vs0, v5, v4 -; CHECK-LE-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-LE-NEXT: xxlandc vs0, vs1, vs0 -; CHECK-LE-NEXT: xxsel v2, v3, v2, vs0 +; CHECK-LE-NEXT: xvcmpgtsp vs1, v4, v5 +; CHECK-LE-NEXT: xxlor vs0, vs1, vs0 +; CHECK-LE-NEXT: xxsel v2, v2, v3, vs0 ; CHECK-LE-NEXT: blr entry: %m = fcmp ueq <4 x float> %c, %d From 3a90a6990a0970b81fd30e67400074515bc2dbb2 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:22:21 +0000 Subject: [PATCH 09/20] Revert "[DAG]: Updated tests" This reverts commit cef0067d84a41aad176352f02d9585dd71355a71. --- llvm/test/CodeGen/X86/andnot-patterns.ll | 184 +++++++++-------------- 1 file changed, 68 insertions(+), 116 deletions(-) diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll index 0701d7046fc35..fc573fbd4fc99 100644 --- a/llvm/test/CodeGen/X86/andnot-patterns.ll +++ b/llvm/test/CodeGen/X86/andnot-patterns.ll @@ -761,7 +761,6 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; ; X86-BMI-LABEL: andnot_bitreverse_i64: ; X86-BMI: # %bb.0: -; X86-BMI-NEXT: pushl %esi ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-BMI-NEXT: bswapl %eax @@ -775,16 +774,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %eax ; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-BMI-NEXT: leal (%eax,%edx,4), %esi -; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; X86-BMI-NEXT: addl %esi, %esi -; X86-BMI-NEXT: shll $2, %edx -; X86-BMI-NEXT: notl %edx -; X86-BMI-NEXT: andnl %edx, %eax, %eax +; X86-BMI-NEXT: leal (%eax,%edx,4), %eax +; X86-BMI-NEXT: movl %eax, %edx +; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 ; X86-BMI-NEXT: shrl %eax -; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA -; X86-BMI-NEXT: andnl %eax, %esi, %eax -; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%edx,2), %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI-NEXT: bswapl %ecx ; X86-BMI-NEXT: movl %ecx, %edx ; X86-BMI-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F @@ -796,17 +792,13 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %edx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %ecx ; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 -; X86-BMI-NEXT: leal (%ecx,%edx,4), %esi -; X86-BMI-NEXT: andl $1431655765, %esi # imm = 0x55555555 -; X86-BMI-NEXT: addl %esi, %esi -; X86-BMI-NEXT: shll $2, %edx -; X86-BMI-NEXT: notl %edx -; X86-BMI-NEXT: andnl %edx, %ecx, %ecx +; X86-BMI-NEXT: leal (%ecx,%edx,4), %ecx +; X86-BMI-NEXT: movl %ecx, %edx +; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 ; X86-BMI-NEXT: shrl %ecx -; X86-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA -; X86-BMI-NEXT: andnl %ecx, %esi, %edx -; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI-NEXT: popl %esi +; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X86-BMI-NEXT: leal (%ecx,%edx,2), %ecx +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %ecx, %edx ; X86-BMI-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i64: @@ -845,23 +837,19 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind { ; X64-BMI-NEXT: andq %rcx, %rsi ; X64-BMI-NEXT: shlq $4, %rsi ; X64-BMI-NEXT: orq %rax, %rsi -; X64-BMI-NEXT: movq %rsi, %rax -; X64-BMI-NEXT: shrq $2, %rax -; X64-BMI-NEXT: movabsq $3689348814741910323, %rcx # imm = 0x3333333333333333 -; X64-BMI-NEXT: andq %rcx, %rax -; X64-BMI-NEXT: andq %rcx, %rsi -; X64-BMI-NEXT: leaq (,%rsi,4), %rcx -; X64-BMI-NEXT: notq %rcx -; X64-BMI-NEXT: andnq %rcx, %rax, %rcx -; X64-BMI-NEXT: shrq %rcx -; X64-BMI-NEXT: movabsq $-6148914691236517206, %rdx # imm = 0xAAAAAAAAAAAAAAAA -; X64-BMI-NEXT: orq %rcx, %rdx -; X64-BMI-NEXT: leaq (%rax,%rsi,4), %rax -; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-BMI-NEXT: movabsq $3689348814741910323, %rax # imm = 0x3333333333333333 +; X64-BMI-NEXT: movq %rsi, %rcx ; X64-BMI-NEXT: andq %rax, %rcx -; X64-BMI-NEXT: addq %rcx, %rcx -; X64-BMI-NEXT: andnq %rdx, %rcx, %rax -; X64-BMI-NEXT: andq %rdi, %rax +; X64-BMI-NEXT: shrq $2, %rsi +; X64-BMI-NEXT: andq %rax, %rsi +; X64-BMI-NEXT: leaq (%rsi,%rcx,4), %rax +; X64-BMI-NEXT: movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555 +; X64-BMI-NEXT: movq %rax, %rdx +; X64-BMI-NEXT: andq %rcx, %rdx +; X64-BMI-NEXT: shrq %rax +; X64-BMI-NEXT: andq %rcx, %rax +; X64-BMI-NEXT: leaq (%rax,%rdx,2), %rax +; X64-BMI-NEXT: andnq %rdi, %rax, %rax ; X64-BMI-NEXT: retq %not = xor i64 %a1, -1 %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not) @@ -908,16 +896,13 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { ; X86-BMI-NEXT: andl $858993459, %ecx # imm = 0x33333333 ; X86-BMI-NEXT: shrl $2, %eax ; X86-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 -; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx -; X86-BMI-NEXT: andl $1431655765, %edx # imm = 0x55555555 -; X86-BMI-NEXT: addl %edx, %edx -; X86-BMI-NEXT: shll $2, %ecx -; X86-BMI-NEXT: notl %ecx -; X86-BMI-NEXT: andnl %ecx, %eax, %eax +; X86-BMI-NEXT: leal (%eax,%ecx,4), %eax +; X86-BMI-NEXT: movl %eax, %ecx +; X86-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 ; X86-BMI-NEXT: shrl %eax -; X86-BMI-NEXT: orl $-1431655766, %eax # imm = 0xAAAAAAAA -; X86-BMI-NEXT: andnl %eax, %edx, %eax -; X86-BMI-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-BMI-NEXT: leal (%eax,%ecx,2), %eax +; X86-BMI-NEXT: andnl {{[0-9]+}}(%esp), %eax, %eax ; X86-BMI-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i32: @@ -955,19 +940,16 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { ; X64-BMI-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F ; X64-BMI-NEXT: orl %eax, %esi ; X64-BMI-NEXT: movl %esi, %eax -; X64-BMI-NEXT: shrl $2, %eax ; X64-BMI-NEXT: andl $858993459, %eax # imm = 0x33333333 +; X64-BMI-NEXT: shrl $2, %esi ; X64-BMI-NEXT: andl $858993459, %esi # imm = 0x33333333 -; X64-BMI-NEXT: leal (,%rsi,4), %ecx -; X64-BMI-NEXT: notl %ecx -; X64-BMI-NEXT: andnl %ecx, %eax, %ecx -; X64-BMI-NEXT: shrl %ecx -; X64-BMI-NEXT: orl $-1431655766, %ecx # imm = 0xAAAAAAAA -; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax +; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-BMI-NEXT: movl %eax, %ecx +; X64-BMI-NEXT: andl $1431655765, %ecx # imm = 0x55555555 +; X64-BMI-NEXT: shrl %eax ; X64-BMI-NEXT: andl $1431655765, %eax # imm = 0x55555555 -; X64-BMI-NEXT: addl %eax, %eax -; X64-BMI-NEXT: andnl %ecx, %eax, %eax -; X64-BMI-NEXT: andl %edi, %eax +; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-BMI-NEXT: andnl %edi, %eax, %eax ; X64-BMI-NEXT: retq %not = xor i32 %a1, -1 %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not) @@ -976,57 +958,30 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind { } define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { -; X86-NOBMI-LABEL: andnot_bitreverse_i16: -; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: rolw $8, %ax -; X86-NOBMI-NEXT: movl %eax, %ecx -; X86-NOBMI-NEXT: andl $3855, %ecx # imm = 0xF0F -; X86-NOBMI-NEXT: shll $4, %ecx -; X86-NOBMI-NEXT: shrl $4, %eax -; X86-NOBMI-NEXT: andl $3855, %eax # imm = 0xF0F -; X86-NOBMI-NEXT: orl %ecx, %eax -; X86-NOBMI-NEXT: movl %eax, %ecx -; X86-NOBMI-NEXT: andl $13107, %ecx # imm = 0x3333 -; X86-NOBMI-NEXT: shrl $2, %eax -; X86-NOBMI-NEXT: andl $13107, %eax # imm = 0x3333 -; X86-NOBMI-NEXT: leal (%eax,%ecx,4), %eax -; X86-NOBMI-NEXT: movl %eax, %ecx -; X86-NOBMI-NEXT: andl $21845, %ecx # imm = 0x5555 -; X86-NOBMI-NEXT: shrl %eax -; X86-NOBMI-NEXT: andl $21845, %eax # imm = 0x5555 -; X86-NOBMI-NEXT: leal (%eax,%ecx,2), %eax -; X86-NOBMI-NEXT: notl %eax -; X86-NOBMI-NEXT: andw {{[0-9]+}}(%esp), %ax -; X86-NOBMI-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NOBMI-NEXT: retl -; -; X86-BMI-LABEL: andnot_bitreverse_i16: -; X86-BMI: # %bb.0: -; X86-BMI-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-BMI-NEXT: rolw $8, %ax -; X86-BMI-NEXT: movl %eax, %ecx -; X86-BMI-NEXT: andl $3855, %ecx # imm = 0xF0F -; X86-BMI-NEXT: shll $4, %ecx -; X86-BMI-NEXT: shrl $4, %eax -; X86-BMI-NEXT: andl $3855, %eax # imm = 0xF0F -; X86-BMI-NEXT: orl %ecx, %eax -; X86-BMI-NEXT: movl %eax, %ecx -; X86-BMI-NEXT: andl $13107, %ecx # imm = 0x3333 -; X86-BMI-NEXT: shrl $2, %eax -; X86-BMI-NEXT: andl $13107, %eax # imm = 0x3333 -; X86-BMI-NEXT: leal (%eax,%ecx,4), %edx -; X86-BMI-NEXT: andl $21845, %edx # imm = 0x5555 -; X86-BMI-NEXT: addl %edx, %edx -; X86-BMI-NEXT: shll $2, %ecx -; X86-BMI-NEXT: notl %ecx -; X86-BMI-NEXT: andnl %ecx, %eax, %eax -; X86-BMI-NEXT: shrl %eax -; X86-BMI-NEXT: orl $43690, %eax # imm = 0xAAAA -; X86-BMI-NEXT: andnl %eax, %edx, %eax -; X86-BMI-NEXT: andw {{[0-9]+}}(%esp), %ax -; X86-BMI-NEXT: # kill: def $ax killed $ax killed $eax -; X86-BMI-NEXT: retl +; X86-LABEL: andnot_bitreverse_i16: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: rolw $8, %ax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $3855, %ecx # imm = 0xF0F +; X86-NEXT: shll $4, %ecx +; X86-NEXT: shrl $4, %eax +; X86-NEXT: andl $3855, %eax # imm = 0xF0F +; X86-NEXT: orl %ecx, %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $13107, %ecx # imm = 0x3333 +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $13107, %eax # imm = 0x3333 +; X86-NEXT: leal (%eax,%ecx,4), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andl $21845, %ecx # imm = 0x5555 +; X86-NEXT: shrl %eax +; X86-NEXT: andl $21845, %eax # imm = 0x5555 +; X86-NEXT: leal (%eax,%ecx,2), %eax +; X86-NEXT: notl %eax +; X86-NEXT: andw {{[0-9]+}}(%esp), %ax +; X86-NEXT: # kill: def $ax killed $ax killed $eax +; X86-NEXT: retl ; ; X64-NOBMI-LABEL: andnot_bitreverse_i16: ; X64-NOBMI: # %bb.0: @@ -1064,19 +1019,16 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind { ; X64-BMI-NEXT: andl $3855, %esi # imm = 0xF0F ; X64-BMI-NEXT: orl %eax, %esi ; X64-BMI-NEXT: movl %esi, %eax -; X64-BMI-NEXT: shrl $2, %eax ; X64-BMI-NEXT: andl $13107, %eax # imm = 0x3333 +; X64-BMI-NEXT: shrl $2, %esi ; X64-BMI-NEXT: andl $13107, %esi # imm = 0x3333 -; X64-BMI-NEXT: leal (,%rsi,4), %ecx -; X64-BMI-NEXT: notl %ecx -; X64-BMI-NEXT: andnl %ecx, %eax, %ecx -; X64-BMI-NEXT: shrl %ecx -; X64-BMI-NEXT: orl $-21846, %ecx # imm = 0xAAAA -; X64-BMI-NEXT: leal (%rax,%rsi,4), %eax +; X64-BMI-NEXT: leal (%rsi,%rax,4), %eax +; X64-BMI-NEXT: movl %eax, %ecx +; X64-BMI-NEXT: andl $21845, %ecx # imm = 0x5555 +; X64-BMI-NEXT: shrl %eax ; X64-BMI-NEXT: andl $21845, %eax # imm = 0x5555 -; X64-BMI-NEXT: addl %eax, %eax -; X64-BMI-NEXT: andnl %ecx, %eax, %eax -; X64-BMI-NEXT: andl %edi, %eax +; X64-BMI-NEXT: leal (%rax,%rcx,2), %eax +; X64-BMI-NEXT: andnl %edi, %eax, %eax ; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax ; X64-BMI-NEXT: retq %not = xor i16 %a1, -1 From 1245b6b33393488eab867affc2b8c59693c26ecf Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:53:43 +0000 Subject: [PATCH 10/20] [DAG]: Rewrite `~(a | b | c)` into `~a & ~b & ~c` --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 5b77dc423b66b..fba8b62f5ca35 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10197,20 +10197,26 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { } } - // fold (not (or A, B)) -> and(not(A), not(B)) + // fold (not (or A, or(B, C))) -> and(not(A), and(not(B), not(C)) if (TLI.hasAndNot(SDValue(N, 0))) { // If we have AndNot then it is profitable to apply demorgan to make use // of the machine instruction. SDValue A; SDValue B; + SDValue C; APInt Cst; - if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Value(B)), m_ConstInt(Cst))) && + if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) && Cst.isAllOnes()) { auto Ty = N->getValueType(0); + + auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)); + auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)); + auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty)); + return DAG.getNode( ISD::AND, DL, VT, - DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)), - DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty))); + NegA, + DAG.getNode(ISD::AND, DL, VT, NegB, NegC)); } } From 746b101885509bddc2f6ad3460a09eaf61de99ae Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:54:42 +0000 Subject: [PATCH 11/20] [X86]: Created new test --- llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll diff --git a/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll new file mode 100644 index 0000000000000..a1ace1b6ca157 --- /dev/null +++ b/llvm/test/CodeGen/X86/bmi-rewrite-demorgan.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI +; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI + +define i32 @not_rewrite_demorgan_i32(i32 %a, i32 %b) nounwind { +; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i32: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: notl %eax +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i32: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: movl %edi, %eax +; X64-WITH-BMI-NEXT: orl %esi, %eax +; X64-WITH-BMI-NEXT: notl %eax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i32: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movl %edi, %eax +; X64-WITHOUT-BMI-NEXT: orl %esi, %eax +; X64-WITHOUT-BMI-NEXT: notl %eax +; X64-WITHOUT-BMI-NEXT: retq + %temp = or i32 %b, %a + %res = xor i32 %temp, -1 + ret i32 %res +} + +define i32 @rewrite_demorgan_i32(i32 %a, i32 %b, i32 %c) nounwind { +; X86-WITH-BMI-LABEL: rewrite_demorgan_i32: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: notl %edx +; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx +; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: rewrite_demorgan_i32: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notl %edi +; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax +; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i32: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i32: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movl %edi, %eax +; X64-WITHOUT-BMI-NEXT: orl %esi, %eax +; X64-WITHOUT-BMI-NEXT: orl %edx, %eax +; X64-WITHOUT-BMI-NEXT: notl %eax +; X64-WITHOUT-BMI-NEXT: retq + %and.demorgan = or i32 %b, %a + %and3.demorgan = or i32 %and.demorgan, %c + %and3 = xor i32 %and3.demorgan, -1 + ret i32 %and3 +} + +define i64 @not_rewrite_demorgan_i64(i64 %a, i64 %b) nounwind { +; X86-WITH-BMI-LABEL: not_rewrite_demorgan_i64: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: notl %eax +; X86-WITH-BMI-NEXT: notl %edx +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: not_rewrite_demorgan_i64: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: movq %rdi, %rax +; X64-WITH-BMI-NEXT: orq %rsi, %rax +; X64-WITH-BMI-NEXT: notq %rax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: notl %edx +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: not_rewrite_demorgan_i64: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax +; X64-WITHOUT-BMI-NEXT: notq %rax +; X64-WITHOUT-BMI-NEXT: retq + %temp = or i64 %b, %a + %res = xor i64 %temp, -1 + ret i64 %res +} + +define i64 @rewrite_demorgan_i64(i64 %a, i64 %b, i64 %c) nounwind { +; X86-WITH-BMI-LABEL: rewrite_demorgan_i64: +; X86-WITH-BMI: # %bb.0: +; X86-WITH-BMI-NEXT: pushl %ebx +; X86-WITH-BMI-NEXT: pushl %edi +; X86-WITH-BMI-NEXT: pushl %esi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-WITH-BMI-NEXT: notl %edi +; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx +; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax +; X86-WITH-BMI-NEXT: notl %ebx +; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx +; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx +; X86-WITH-BMI-NEXT: popl %esi +; X86-WITH-BMI-NEXT: popl %edi +; X86-WITH-BMI-NEXT: popl %ebx +; X86-WITH-BMI-NEXT: retl +; +; X64-WITH-BMI-LABEL: rewrite_demorgan_i64: +; X64-WITH-BMI: # %bb.0: +; X64-WITH-BMI-NEXT: notq %rdi +; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax +; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax +; X64-WITH-BMI-NEXT: retq +; +; X86-WITHOUT-BMI-LABEL: rewrite_demorgan_i64: +; X86-WITHOUT-BMI: # %bb.0: +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx +; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax +; X86-WITHOUT-BMI-NEXT: notl %eax +; X86-WITHOUT-BMI-NEXT: notl %edx +; X86-WITHOUT-BMI-NEXT: retl +; +; X64-WITHOUT-BMI-LABEL: rewrite_demorgan_i64: +; X64-WITHOUT-BMI: # %bb.0: +; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax +; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax +; X64-WITHOUT-BMI-NEXT: notq %rax +; X64-WITHOUT-BMI-NEXT: retq + %and.demorgan = or i64 %b, %a + %and3.demorgan = or i64 %and.demorgan, %c + %and3 = xor i64 %and3.demorgan, -1 + ret i64 %and3 +} From f237020a2005d5b42cc32e0849eacd5ba806ff2f Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:55:33 +0000 Subject: [PATCH 12/20] [DAG]: Run fmt --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fba8b62f5ca35..fbc63d8eb6d40 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10205,18 +10205,20 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue B; SDValue C; APInt Cst; - if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), m_ConstInt(Cst))) && + if (sd_match(N, m_Xor(m_Or(m_Value(A), m_Or(m_Value(B), m_Value(C))), + m_ConstInt(Cst))) && Cst.isAllOnes()) { auto Ty = N->getValueType(0); - auto NegA = DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)); - auto NegB = DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)); - auto NegC = DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty)); + auto NegA = + DAG.getNode(ISD::XOR, DL, VT, A, DAG.getConstant(Cst, DL, Ty)); + auto NegB = + DAG.getNode(ISD::XOR, DL, VT, B, DAG.getConstant(Cst, DL, Ty)); + auto NegC = + DAG.getNode(ISD::XOR, DL, VT, C, DAG.getConstant(Cst, DL, Ty)); - return DAG.getNode( - ISD::AND, DL, VT, - NegA, - DAG.getNode(ISD::AND, DL, VT, NegB, NegC)); + return DAG.getNode(ISD::AND, DL, VT, NegA, + DAG.getNode(ISD::AND, DL, VT, NegB, NegC)); } } From 956b849a31b714a235ad160558f1e3ff69f5f363 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:55:51 +0000 Subject: [PATCH 13/20] [AArch64]: Updated tests --- llvm/test/CodeGen/AArch64/bsl.ll | 120 ++++++++------- .../CodeGen/AArch64/build-vector-dup-simd.ll | 24 +-- llvm/test/CodeGen/AArch64/ctlz.ll | 139 +++++++++++------- llvm/test/CodeGen/AArch64/eon.ll | 22 ++- .../CodeGen/AArch64/fp16-v4-instructions.ll | 44 ++++-- .../CodeGen/AArch64/fp16-v8-instructions.ll | 50 +++++-- llvm/test/CodeGen/AArch64/sve2-bsl.ll | 36 +++-- 7 files changed, 258 insertions(+), 177 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll index df6b6f75b8935..fcf7393d2c801 100644 --- a/llvm/test/CodeGen/AArch64/bsl.ll +++ b/llvm/test/CodeGen/AArch64/bsl.ll @@ -32,17 +32,19 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { ; NEON-LABEL: nbsl_v1i64: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: bic v1.8b, v1.8b, v2.8b ; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: bic v0.8b, v0.8b, v1.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v1i64: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b +; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b ; SVE2-NEXT: ret %4 = and <1 x i64> %2, %0 %5 = xor <1 x i64> %2, splat (i64 -1) @@ -78,9 +80,8 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { ; NEON-LABEL: bsl2n_v1i64: ; NEON: // %bb.0: -; NEON-NEXT: and v0.8b, v2.8b, v0.8b -; NEON-NEXT: orr v1.8b, v2.8b, v1.8b -; NEON-NEXT: orn v0.8b, v0.8b, v1.8b +; NEON-NEXT: mvn v1.8b, v1.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: bsl2n_v1i64: @@ -118,17 +119,19 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NEON-LABEL: nbsl_v2i64: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: bic v1.16b, v1.16b, v2.16b ; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: bic v0.16b, v0.16b, v1.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v2i64: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b +; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b ; SVE2-NEXT: ret %4 = and <2 x i64> %2, %0 %5 = xor <2 x i64> %2, splat (i64 -1) @@ -164,9 +167,8 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NEON-LABEL: bsl2n_v2i64: ; NEON: // %bb.0: -; NEON-NEXT: and v0.16b, v2.16b, v0.16b -; NEON-NEXT: orr v1.16b, v2.16b, v1.16b -; NEON-NEXT: orn v0.16b, v0.16b, v1.16b +; NEON-NEXT: mvn v1.16b, v1.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: bsl2n_v2i64: @@ -189,17 +191,18 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { ; NEON-LABEL: nbsl_v8i8: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.8b, v1.8b, v2.8b -; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: and v3.8b, v2.8b, v1.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: orn v1.8b, v3.8b, v1.8b +; NEON-NEXT: bic v0.8b, v1.8b, v0.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v8i8: ; SVE2: // %bb.0: -; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: and v3.8b, v2.8b, v1.8b +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b +; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b ; SVE2-NEXT: ret %4 = and <8 x i8> %2, %0 %5 = xor <8 x i8> %2, splat (i8 -1) @@ -212,17 +215,18 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { ; NEON-LABEL: nbsl_v4i16: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.8b, v1.8b, v2.8b -; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: and v3.8b, v2.8b, v1.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: orn v1.8b, v3.8b, v1.8b +; NEON-NEXT: bic v0.8b, v1.8b, v0.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v4i16: ; SVE2: // %bb.0: -; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: and v3.8b, v2.8b, v1.8b +; SVE2-NEXT: and v0.8b, v2.8b, v0.8b +; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b +; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b ; SVE2-NEXT: ret %4 = and <4 x i16> %2, %0 %5 = xor <4 x i16> %2, splat (i16 -1) @@ -235,17 +239,19 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { ; NEON-LABEL: nbsl_v2i32: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: bic v1.8b, v1.8b, v2.8b ; NEON-NEXT: mvn v0.8b, v0.8b +; NEON-NEXT: bic v0.8b, v0.8b, v1.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v2i32: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 +; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b +; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b ; SVE2-NEXT: ret %4 = and <2 x i32> %2, %0 %5 = xor <2 x i32> %2, splat (i32 -1) @@ -258,17 +264,18 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; NEON-LABEL: nbsl_v16i8: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.16b, v1.16b, v2.16b -; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: and v3.16b, v2.16b, v1.16b +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: orn v1.16b, v3.16b, v1.16b +; NEON-NEXT: bic v0.16b, v1.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v16i8: ; SVE2: // %bb.0: -; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: and v3.16b, v2.16b, v1.16b +; SVE2-NEXT: and v0.16b, v2.16b, v0.16b +; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b +; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b ; SVE2-NEXT: ret %4 = and <16 x i8> %2, %0 %5 = xor <16 x i8> %2, splat (i8 -1) @@ -281,17 +288,18 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; NEON-LABEL: nbsl_v8i16: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.16b, v1.16b, v2.16b -; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: and v3.16b, v2.16b, v1.16b +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: orn v1.16b, v3.16b, v1.16b +; NEON-NEXT: bic v0.16b, v1.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v8i16: ; SVE2: // %bb.0: -; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: and v3.16b, v2.16b, v1.16b +; SVE2-NEXT: and v0.16b, v2.16b, v0.16b +; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b +; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b ; SVE2-NEXT: ret %4 = and <8 x i16> %2, %0 %5 = xor <8 x i16> %2, splat (i16 -1) @@ -304,17 +312,19 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; NEON-LABEL: nbsl_v4i32: ; NEON: // %bb.0: -; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: bic v1.16b, v1.16b, v2.16b ; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: bic v0.16b, v0.16b, v1.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v4i32: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b +; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b ; SVE2-NEXT: ret %4 = and <4 x i32> %2, %0 %5 = xor <4 x i32> %2, splat (i32 -1) @@ -471,16 +481,14 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 { define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 { ; NEON-LABEL: nor_q: ; NEON: // %bb.0: -; NEON-NEXT: orr v0.16b, v1.16b, v0.16b -; NEON-NEXT: mvn v0.16b, v0.16b +; NEON-NEXT: mvn v1.16b, v1.16b +; NEON-NEXT: bic v0.16b, v1.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nor_q: ; SVE2: // %bb.0: -; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 -; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 -; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d -; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 +; SVE2-NEXT: mvn v1.16b, v1.16b +; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b ; SVE2-NEXT: ret %3 = or <2 x i64> %1, %0 %4 = xor <2 x i64> %3, splat (i64 -1) diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll index ac0b8e89519dd..af7f9b6d471ad 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll @@ -117,10 +117,10 @@ entry: define <1 x float> @dup_v1i32_ueq(float %a, float %b) { ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq: ; CHECK-NOFULLFP16: // %bb.0: // %entry -; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1 -; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0 -; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b +; CHECK-NOFULLFP16-NEXT: fcmgt s2, s1, s0 +; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1 +; CHECK-NOFULLFP16-NEXT: mvn v1.8b, v2.8b +; CHECK-NOFULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b ; CHECK-NOFULLFP16-NEXT: ret ; ; CHECK-NONANS-LABEL: dup_v1i32_ueq: @@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) { ; ; CHECK-FULLFP16-LABEL: dup_v1i32_ueq: ; CHECK-FULLFP16: // %bb.0: // %entry -; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1 -; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0 -; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b +; CHECK-FULLFP16-NEXT: fcmgt s2, s1, s0 +; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1 +; CHECK-FULLFP16-NEXT: mvn v1.8b, v2.8b +; CHECK-FULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b ; CHECK-FULLFP16-NEXT: ret entry: %0 = fcmp ueq float %a, %b @@ -260,10 +260,10 @@ entry: define <1 x float> @dup_v1i32_uno(float %a, float %b) { ; CHECK-LABEL: dup_v1i32_uno: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmge s2, s0, s1 -; CHECK-NEXT: fcmgt s0, s1, s0 -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: fcmgt s2, s1, s0 +; CHECK-NEXT: fcmge s0, s0, s1 +; CHECK-NEXT: mvn v1.8b, v2.8b +; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret entry: %0 = fcmp uno float %a, %b diff --git a/llvm/test/CodeGen/AArch64/ctlz.ll b/llvm/test/CodeGen/AArch64/ctlz.ll index 04124609eec74..f459cc2d78442 100644 --- a/llvm/test/CodeGen/AArch64/ctlz.ll +++ b/llvm/test/CodeGen/AArch64/ctlz.ll @@ -276,18 +276,23 @@ define <2 x i64> @v2i64(<2 x i64> %d) { ; CHECK-SD-LABEL: v2i64: ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #1 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #2 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #4 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #8 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #16 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #32 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: orr v2.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: mvn v0.16b, v0.16b +; CHECK-SD-NEXT: ushr v3.2d, v2.2d, #2 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: orr v2.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v3.16b +; CHECK-SD-NEXT: ushr v4.2d, v2.2d, #4 +; CHECK-SD-NEXT: orr v2.16b, v2.16b, v4.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: ushr v1.2d, v2.2d, #8 +; CHECK-SD-NEXT: orr v2.16b, v2.16b, v1.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ushr v3.2d, v2.2d, #16 +; CHECK-SD-NEXT: orr v1.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v3.16b +; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #32 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: cnt v0.16b, v0.16b ; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b ; CHECK-SD-NEXT: uaddlp v0.4s, v0.8h @@ -314,34 +319,44 @@ define <3 x i64> @v3i64(<3 x i64> %d) { ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-SD-NEXT: ushr v4.2d, v2.2d, #1 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0] +; CHECK-SD-NEXT: orr v6.16b, v2.16b, v4.16b +; CHECK-SD-NEXT: mvn v2.16b, v2.16b ; CHECK-SD-NEXT: ushr v1.2d, v0.2d, #1 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-SD-NEXT: ushr v1.2d, v2.2d, #1 -; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #2 -; CHECK-SD-NEXT: orr v1.16b, v2.16b, v1.16b -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #2 -; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #4 -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #4 -; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #8 -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #8 -; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #16 -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #16 -; CHECK-SD-NEXT: ushr v3.2d, v0.2d, #32 -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v1.2d, #32 +; CHECK-SD-NEXT: ushr v7.2d, v6.2d, #2 +; CHECK-SD-NEXT: bic v2.16b, v2.16b, v4.16b +; CHECK-SD-NEXT: orr v3.16b, v0.16b, v1.16b ; CHECK-SD-NEXT: mvn v0.16b, v0.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v2.16b +; CHECK-SD-NEXT: orr v6.16b, v6.16b, v7.16b +; CHECK-SD-NEXT: bic v2.16b, v2.16b, v7.16b +; CHECK-SD-NEXT: ushr v5.2d, v3.2d, #2 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ushr v17.2d, v6.2d, #4 +; CHECK-SD-NEXT: orr v3.16b, v3.16b, v5.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v5.16b +; CHECK-SD-NEXT: orr v6.16b, v6.16b, v17.16b +; CHECK-SD-NEXT: bic v2.16b, v2.16b, v17.16b +; CHECK-SD-NEXT: ushr v16.2d, v3.2d, #4 +; CHECK-SD-NEXT: ushr v4.2d, v6.2d, #8 +; CHECK-SD-NEXT: orr v3.16b, v3.16b, v16.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v16.16b +; CHECK-SD-NEXT: orr v6.16b, v6.16b, v4.16b +; CHECK-SD-NEXT: bic v2.16b, v2.16b, v4.16b +; CHECK-SD-NEXT: ushr v1.2d, v3.2d, #8 +; CHECK-SD-NEXT: orr v3.16b, v3.16b, v1.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: ushr v5.2d, v3.2d, #16 +; CHECK-SD-NEXT: orr v1.16b, v3.16b, v5.16b +; CHECK-SD-NEXT: ushr v3.2d, v6.2d, #16 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v5.16b +; CHECK-SD-NEXT: ushr v1.2d, v1.2d, #32 +; CHECK-SD-NEXT: orr v4.16b, v6.16b, v3.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b +; CHECK-SD-NEXT: bic v1.16b, v2.16b, v3.16b +; CHECK-SD-NEXT: ushr v2.2d, v4.2d, #32 ; CHECK-SD-NEXT: cnt v0.16b, v0.16b -; CHECK-SD-NEXT: mvn v1.16b, v1.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v2.16b ; CHECK-SD-NEXT: cnt v1.16b, v1.16b ; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b ; CHECK-SD-NEXT: uaddlp v0.4s, v0.8h @@ -377,30 +392,40 @@ define <4 x i64> @v4i64(<4 x i64> %d) { ; CHECK-SD: // %bb.0: // %entry ; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #1 ; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #1 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #2 -; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #2 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #4 -; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #4 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #8 -; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #8 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #16 -; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #16 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b -; CHECK-SD-NEXT: ushr v2.2d, v0.2d, #32 -; CHECK-SD-NEXT: ushr v3.2d, v1.2d, #32 -; CHECK-SD-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-SD-NEXT: orr v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: orr v4.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: orr v5.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: mvn v0.16b, v0.16b ; CHECK-SD-NEXT: mvn v1.16b, v1.16b +; CHECK-SD-NEXT: ushr v6.2d, v4.2d, #2 +; CHECK-SD-NEXT: ushr v7.2d, v5.2d, #2 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: orr v4.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: orr v5.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v6.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v7.16b +; CHECK-SD-NEXT: ushr v16.2d, v4.2d, #4 +; CHECK-SD-NEXT: ushr v17.2d, v5.2d, #4 +; CHECK-SD-NEXT: orr v4.16b, v4.16b, v16.16b +; CHECK-SD-NEXT: orr v5.16b, v5.16b, v17.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v16.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v17.16b +; CHECK-SD-NEXT: ushr v2.2d, v4.2d, #8 +; CHECK-SD-NEXT: ushr v3.2d, v5.2d, #8 +; CHECK-SD-NEXT: orr v4.16b, v4.16b, v2.16b +; CHECK-SD-NEXT: orr v5.16b, v5.16b, v3.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: ushr v6.2d, v4.2d, #16 +; CHECK-SD-NEXT: ushr v7.2d, v5.2d, #16 +; CHECK-SD-NEXT: orr v2.16b, v4.16b, v6.16b +; CHECK-SD-NEXT: orr v3.16b, v5.16b, v7.16b +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v6.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v7.16b +; CHECK-SD-NEXT: ushr v2.2d, v2.2d, #32 +; CHECK-SD-NEXT: ushr v3.2d, v3.2d, #32 +; CHECK-SD-NEXT: bic v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: bic v1.16b, v1.16b, v3.16b ; CHECK-SD-NEXT: cnt v0.16b, v0.16b ; CHECK-SD-NEXT: cnt v1.16b, v1.16b ; CHECK-SD-NEXT: uaddlp v0.8h, v0.16b diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll index 8b31cbfe16b1a..ea0e0122d9b6d 100644 --- a/llvm/test/CodeGen/AArch64/eon.ll +++ b/llvm/test/CodeGen/AArch64/eon.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc %s -pass-remarks-missed=gisel* -mtriple=aarch64-none-linux-gnu -global-isel -o - 2>&1 | FileCheck %s @@ -6,8 +7,9 @@ ; Check that the eon instruction is generated instead of eor,movn define i64 @test1(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test1: -; CHECK: eon -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eon x0, x0, x1, lsl #4 +; CHECK-NEXT: ret entry: %shl = shl i64 %b, 4 %neg = xor i64 %a, -1 @@ -18,10 +20,11 @@ entry: ; Same check with multiple uses of %neg define i64 @test2(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test2: -; CHECK: eon -; CHECK: eon -; CHECK: lsl -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: eon x8, x0, x1, lsl #4 +; CHECK-NEXT: eon x9, x2, x1, lsl #4 +; CHECK-NEXT: lsl x0, x8, x9 +; CHECK-NEXT: ret entry: %shl = shl i64 %b, 4 %neg = xor i64 %shl, -1 @@ -33,9 +36,6 @@ entry: ; Check that eon is generated if the xor is a disjoint or. define i64 @disjoint_or(i64 %a, i64 %b) { -; CHECK-LABEL: disjoint_or: -; CHECK: eon -; CHECK: ret %or = or disjoint i64 %a, %b %eon = xor i64 %or, -1 ret i64 %eon @@ -43,10 +43,6 @@ define i64 @disjoint_or(i64 %a, i64 %b) { ; Check that eon is *not* generated if the or is not disjoint. define i64 @normal_or(i64 %a, i64 %b) { -; CHECK-LABEL: normal_or: -; CHECK: orr -; CHECK: mvn -; CHECK: ret %or = or i64 %a, %b %not = xor i64 %or, -1 ret i64 %not diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 6233ce743b706..529b76cf84906 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: test_fcmp_ueq: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcmgt v2.4h, v0.4h, v1.4h -; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h -; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-FP16-NEXT: mvn v0.8b, v0.8b -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: test_fcmp_ueq: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h +; CHECK-FP16-SD-NEXT: fcmgt v0.4h, v0.4h, v1.4h +; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b +; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-FP16-SD-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_ueq: ; CHECK-CVT-GI: // %bb.0: @@ -581,6 +581,14 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_ueq: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmgt v2.4h, v0.4h, v1.4h +; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h +; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-FP16-GI-NEXT: ret %1 = fcmp ueq <4 x half> %a, %b ret <4 x i1> %1 @@ -714,13 +722,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: test_fcmp_uno: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcmge v2.4h, v0.4h, v1.4h -; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h -; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-FP16-NEXT: mvn v0.8b, v0.8b -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: test_fcmp_uno: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h +; CHECK-FP16-SD-NEXT: fcmge v0.4h, v0.4h, v1.4h +; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b +; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-FP16-SD-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_uno: ; CHECK-CVT-GI: // %bb.0: @@ -732,6 +740,14 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_uno: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmge v2.4h, v0.4h, v1.4h +; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h +; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b +; CHECK-FP16-GI-NEXT: ret %1 = fcmp uno <4 x half> %a, %b ret <4 x i1> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 86763eb5f9e3b..6d67fc9ebe1c6 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: test_fcmp_ueq: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcmgt v2.8h, v0.8h, v1.8h -; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h -; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: mvn v0.16b, v0.16b -; CHECK-FP16-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: test_fcmp_ueq: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h +; CHECK-FP16-SD-NEXT: fcmgt v0.8h, v0.8h, v1.8h +; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-SD-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_ueq: ; CHECK-CVT-GI: // %bb.0: @@ -1016,6 +1016,15 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_ueq: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmgt v2.8h, v0.8h, v1.8h +; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h +; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-GI-NEXT: ret %1 = fcmp ueq <8 x half> %a, %b ret <8 x i1> %1 } @@ -1190,14 +1199,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-LABEL: test_fcmp_uno: -; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: fcmge v2.8h, v0.8h, v1.8h -; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h -; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-FP16-NEXT: mvn v0.16b, v0.16b -; CHECK-FP16-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-NEXT: ret +; CHECK-FP16-SD-LABEL: test_fcmp_uno: +; CHECK-FP16-SD: // %bb.0: +; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h +; CHECK-FP16-SD-NEXT: fcmge v0.8h, v0.8h, v1.8h +; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b +; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b +; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-SD-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_uno: ; CHECK-CVT-GI: // %bb.0: @@ -1216,6 +1225,15 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-GI-NEXT: ret +; +; CHECK-FP16-GI-LABEL: test_fcmp_uno: +; CHECK-FP16-GI: // %bb.0: +; CHECK-FP16-GI-NEXT: fcmge v2.8h, v0.8h, v1.8h +; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h +; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b +; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-GI-NEXT: ret %1 = fcmp uno <8 x half> %a, %b ret <8 x i1> %1 } diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 6cfe66eb8e633..80293388a5cf9 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -46,7 +46,9 @@ define @nbsl_i8( %a, %b) ; CHECK-LABEL: nbsl_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.b, #127 // =0x7f -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: and z1.b, z1.b, #0x80 +; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d +; CHECK-NEXT: bic z0.d, z2.d, z1.d ; CHECK-NEXT: ret %1 = and %a, splat(i8 127) %2 = and %b, splat(i8 -128) @@ -59,7 +61,9 @@ define @nbsl_i16( %a, %b ; CHECK-LABEL: nbsl_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: and z1.h, z1.h, #0x8000 +; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d +; CHECK-NEXT: bic z0.d, z2.d, z1.d ; CHECK-NEXT: ret %1 = and %a, splat(i16 32767) %2 = and %b, splat(i16 -32768) @@ -72,7 +76,9 @@ define @nbsl_i32( %a, %b ; CHECK-LABEL: nbsl_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: and z1.s, z1.s, #0x80000000 +; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d +; CHECK-NEXT: bic z0.d, z2.d, z1.d ; CHECK-NEXT: ret %1 = and %a, splat(i32 2147483647) %2 = and %b, splat(i32 -2147483648) @@ -85,7 +91,9 @@ define @nbsl_i64( %a, %b ; CHECK-LABEL: nbsl_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 +; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d +; CHECK-NEXT: bic z0.d, z2.d, z1.d ; CHECK-NEXT: ret %1 = and %a, splat(i64 9223372036854775807) %2 = and %b, splat(i64 -9223372036854775808) @@ -115,7 +123,9 @@ define @codegen_bsl_i8( %0, @codegen_nbsl_i8( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: bic z1.d, z1.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i8 -1) @@ -165,7 +175,9 @@ define @codegen_bsl_i16( %0, @codegen_nbsl_i16( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: bic z1.d, z1.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i16 -1) @@ -215,7 +227,9 @@ define @codegen_bsl_i32( %0, @codegen_nbsl_i32( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: bic z1.d, z1.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i32 -1) @@ -265,7 +279,9 @@ define @codegen_bsl_i64( %0, @codegen_nbsl_i64( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; CHECK-NEXT: bic z1.d, z1.d, z2.d +; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d +; CHECK-NEXT: bic z0.d, z0.d, z1.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i64 -1) @@ -341,7 +357,9 @@ define @nand( %0, %1) #0 define @nor( %0, %1) #0 { ; CHECK-LABEL: nor: ; CHECK: // %bb.0: -; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d +; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z2.d +; CHECK-NEXT: bic z0.d, z1.d, z0.d ; CHECK-NEXT: ret %3 = or %1, %0 %4 = xor %3, splat (i64 -1) From 643e4d53a6884f33b960c1fca389422999611d01 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:56:14 +0000 Subject: [PATCH 14/20] [X86]: Updated tests --- llvm/test/CodeGen/X86/abds-vector-128.ll | 6 +- .../test/CodeGen/X86/avx512-mask-bit-manip.ll | 25 +- llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll | 174 ------------ llvm/test/CodeGen/X86/bool-ext-inc.ll | 7 +- llvm/test/CodeGen/X86/combine-or.ll | 39 ++- llvm/test/CodeGen/X86/combine-srl.ll | 6 +- .../CodeGen/X86/expand-vp-int-intrinsics.ll | 6 +- llvm/test/CodeGen/X86/icmp-abs-C-vec.ll | 105 +++---- llvm/test/CodeGen/X86/icmp-pow2-diff.ll | 54 ++-- llvm/test/CodeGen/X86/ispow2.ll | 24 +- llvm/test/CodeGen/X86/machine-cp.ll | 67 ++--- llvm/test/CodeGen/X86/mul-cmp.ll | 16 +- llvm/test/CodeGen/X86/promote-cmp.ll | 34 +-- llvm/test/CodeGen/X86/sat-add.ll | 10 +- llvm/test/CodeGen/X86/setcc-combine.ll | 6 +- llvm/test/CodeGen/X86/setcc-logic.ll | 7 +- .../CodeGen/X86/srem-seteq-vec-nonsplat.ll | 16 +- llvm/test/CodeGen/X86/sshl_sat_vec.ll | 6 +- ...-masked-merge-vector-variablemask-const.ll | 15 +- .../X86/urem-seteq-vec-tautological.ll | 12 +- llvm/test/CodeGen/X86/vec_cmp_sint-128.ll | 48 ++-- llvm/test/CodeGen/X86/vec_cmp_uint-128.ll | 48 ++-- llvm/test/CodeGen/X86/vec_compare.ll | 24 +- llvm/test/CodeGen/X86/vec_ctbits.ll | 12 +- llvm/test/CodeGen/X86/vec_setcc-2.ll | 13 +- llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 96 +++---- llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 266 +++++++++--------- llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll | 6 +- llvm/test/CodeGen/X86/vector-popcnt-128.ll | 10 +- llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 24 +- llvm/test/CodeGen/X86/vsplit-and.ll | 22 +- 31 files changed, 534 insertions(+), 670 deletions(-) delete mode 100644 llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll index 148be83892b72..bc57a31f063b5 100644 --- a/llvm/test/CodeGen/X86/abds-vector-128.ll +++ b/llvm/test/CodeGen/X86/abds-vector-128.ll @@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] ; SSE2-NEXT: pand %xmm6, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: paddq %xmm4, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll index 3fcfb9d278da7..37df42ea2682d 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll @@ -714,18 +714,19 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-NEXT: vpmovmskb %ymm4, %ecx ; AVX512F-NEXT: shlq $32, %rcx ; AVX512F-NEXT: leaq (%rax,%rcx), %rdx -; AVX512F-NEXT: addq %rcx, %rax -; AVX512F-NEXT: addq $-1, %rax -; AVX512F-NEXT: andnq %rax, %rdx, %rax -; AVX512F-NEXT: movq %rax, %rcx -; AVX512F-NEXT: movl %eax, %edx -; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: shrq $32, %rax -; AVX512F-NEXT: shrq $48, %rcx -; AVX512F-NEXT: shrl $16, %edx -; AVX512F-NEXT: kmovw %edx, %k2 -; AVX512F-NEXT: kmovw %ecx, %k3 -; AVX512F-NEXT: kmovw %eax, %k4 +; AVX512F-NEXT: addq $-1, %rdx +; AVX512F-NEXT: notq %rcx +; AVX512F-NEXT: andnq %rcx, %rax, %rax +; AVX512F-NEXT: andq %rax, %rdx +; AVX512F-NEXT: movq %rdx, %rax +; AVX512F-NEXT: movl %edx, %ecx +; AVX512F-NEXT: kmovw %edx, %k1 +; AVX512F-NEXT: shrq $32, %rdx +; AVX512F-NEXT: shrq $48, %rax +; AVX512F-NEXT: shrl $16, %ecx +; AVX512F-NEXT: kmovw %ecx, %k2 +; AVX512F-NEXT: kmovw %eax, %k3 +; AVX512F-NEXT: kmovw %edx, %k4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 diff --git a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll b/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll deleted file mode 100644 index 7f3a376b24b2a..0000000000000 --- a/llvm/test/CodeGen/X86/bmi-reassoc-demorgan.ll +++ /dev/null @@ -1,174 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X86-WITH-BMI -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefix=X64-WITH-BMI -; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-WITHOUT-BMI -; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64-WITHOUT-BMI - -define i32 @reassoc_demorgan_i32(i32 %a, i32 %b) nounwind { -; X86-WITH-BMI-LABEL: reassoc_demorgan_i32: -; X86-WITH-BMI: # %bb.0: -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-WITH-BMI-NEXT: notl %ecx -; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax -; X86-WITH-BMI-NEXT: retl -; -; X64-WITH-BMI-LABEL: reassoc_demorgan_i32: -; X64-WITH-BMI: # %bb.0: -; X64-WITH-BMI-NEXT: notl %edi -; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax -; X64-WITH-BMI-NEXT: retq -; -; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i32: -; X86-WITHOUT-BMI: # %bb.0: -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: notl %eax -; X86-WITHOUT-BMI-NEXT: retl -; -; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i32: -; X64-WITHOUT-BMI: # %bb.0: -; X64-WITHOUT-BMI-NEXT: movl %edi, %eax -; X64-WITHOUT-BMI-NEXT: orl %esi, %eax -; X64-WITHOUT-BMI-NEXT: notl %eax -; X64-WITHOUT-BMI-NEXT: retq - %temp = or i32 %b, %a - %res = xor i32 %temp, -1 - ret i32 %res -} - -define i32 @reassoc_demorgan_three_arguments_i32(i32 %a, i32 %b, i32 %c) nounwind { -; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32: -; X86-WITH-BMI: # %bb.0: -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-WITH-BMI-NEXT: notl %edx -; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %ecx -; X86-WITH-BMI-NEXT: andnl %ecx, %eax, %eax -; X86-WITH-BMI-NEXT: retl -; -; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i32: -; X64-WITH-BMI: # %bb.0: -; X64-WITH-BMI-NEXT: notl %edi -; X64-WITH-BMI-NEXT: andnl %edi, %esi, %eax -; X64-WITH-BMI-NEXT: andnl %eax, %edx, %eax -; X64-WITH-BMI-NEXT: retq -; -; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32: -; X86-WITHOUT-BMI: # %bb.0: -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: notl %eax -; X86-WITHOUT-BMI-NEXT: retl -; -; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i32: -; X64-WITHOUT-BMI: # %bb.0: -; X64-WITHOUT-BMI-NEXT: movl %edi, %eax -; X64-WITHOUT-BMI-NEXT: orl %esi, %eax -; X64-WITHOUT-BMI-NEXT: orl %edx, %eax -; X64-WITHOUT-BMI-NEXT: notl %eax -; X64-WITHOUT-BMI-NEXT: retq - %and.demorgan = or i32 %b, %a - %and3.demorgan = or i32 %and.demorgan, %c - %and3 = xor i32 %and3.demorgan, -1 - ret i32 %and3 -} - -define i64 @reassoc_demorgan_i64(i64 %a, i64 %b) nounwind { -; X86-WITH-BMI-LABEL: reassoc_demorgan_i64: -; X86-WITH-BMI: # %bb.0: -; X86-WITH-BMI-NEXT: pushl %esi -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-WITH-BMI-NEXT: notl %edx -; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax -; X86-WITH-BMI-NEXT: notl %esi -; X86-WITH-BMI-NEXT: andnl %esi, %ecx, %edx -; X86-WITH-BMI-NEXT: popl %esi -; X86-WITH-BMI-NEXT: retl -; -; X64-WITH-BMI-LABEL: reassoc_demorgan_i64: -; X64-WITH-BMI: # %bb.0: -; X64-WITH-BMI-NEXT: notq %rdi -; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax -; X64-WITH-BMI-NEXT: retq -; -; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_i64: -; X86-WITHOUT-BMI: # %bb.0: -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: notl %eax -; X86-WITHOUT-BMI-NEXT: notl %edx -; X86-WITHOUT-BMI-NEXT: retl -; -; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_i64: -; X64-WITHOUT-BMI: # %bb.0: -; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax -; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax -; X64-WITHOUT-BMI-NEXT: notq %rax -; X64-WITHOUT-BMI-NEXT: retq - %temp = or i64 %b, %a - %res = xor i64 %temp, -1 - ret i64 %res -} - -define i64 @reassoc_demorgan_three_arguments_i64(i64 %a, i64 %b, i64 %c) nounwind { -; X86-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64: -; X86-WITH-BMI: # %bb.0: -; X86-WITH-BMI-NEXT: pushl %ebx -; X86-WITH-BMI-NEXT: pushl %edi -; X86-WITH-BMI-NEXT: pushl %esi -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-WITH-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-WITH-BMI-NEXT: notl %edi -; X86-WITH-BMI-NEXT: andnl %edi, %edx, %edx -; X86-WITH-BMI-NEXT: andnl %edx, %eax, %eax -; X86-WITH-BMI-NEXT: notl %ebx -; X86-WITH-BMI-NEXT: andnl %ebx, %esi, %edx -; X86-WITH-BMI-NEXT: andnl %edx, %ecx, %edx -; X86-WITH-BMI-NEXT: popl %esi -; X86-WITH-BMI-NEXT: popl %edi -; X86-WITH-BMI-NEXT: popl %ebx -; X86-WITH-BMI-NEXT: retl -; -; X64-WITH-BMI-LABEL: reassoc_demorgan_three_arguments_i64: -; X64-WITH-BMI: # %bb.0: -; X64-WITH-BMI-NEXT: notq %rdi -; X64-WITH-BMI-NEXT: andnq %rdi, %rsi, %rax -; X64-WITH-BMI-NEXT: andnq %rax, %rdx, %rax -; X64-WITH-BMI-NEXT: retq -; -; X86-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64: -; X86-WITHOUT-BMI: # %bb.0: -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %edx -; X86-WITHOUT-BMI-NEXT: orl {{[0-9]+}}(%esp), %eax -; X86-WITHOUT-BMI-NEXT: notl %eax -; X86-WITHOUT-BMI-NEXT: notl %edx -; X86-WITHOUT-BMI-NEXT: retl -; -; X64-WITHOUT-BMI-LABEL: reassoc_demorgan_three_arguments_i64: -; X64-WITHOUT-BMI: # %bb.0: -; X64-WITHOUT-BMI-NEXT: movq %rdi, %rax -; X64-WITHOUT-BMI-NEXT: orq %rsi, %rax -; X64-WITHOUT-BMI-NEXT: orq %rdx, %rax -; X64-WITHOUT-BMI-NEXT: notq %rax -; X64-WITHOUT-BMI-NEXT: retq - %and.demorgan = or i64 %b, %a - %and3.demorgan = or i64 %and.demorgan, %c - %and3 = xor i64 %and3.demorgan, -1 - ret i64 %and3 -} diff --git a/llvm/test/CodeGen/X86/bool-ext-inc.ll b/llvm/test/CodeGen/X86/bool-ext-inc.ll index 088b0ce857f20..d89893f94bdae 100644 --- a/llvm/test/CodeGen/X86/bool-ext-inc.ll +++ b/llvm/test/CodeGen/X86/bool-ext-inc.ll @@ -88,8 +88,11 @@ define <4 x i32> @bool_logic_and_math_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> ; CHECK: # %bb.0: ; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm1 -; CHECK-NEXT: vpor %xmm1, %xmm0, %xmm0 -; CHECK-NEXT: vpsrld $31, %xmm0, %xmm0 +; CHECK-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1] +; CHECK-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq %cmp1 = icmp ne <4 x i32> %a, %b %cmp2 = icmp ne <4 x i32> %c, %d diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index 8c91274abf3dd..8d5bbb4ae8e1e 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -183,14 +183,32 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind { } define i64 @or_build_pair_not(i32 %a0, i32 %a1) { -; CHECK-LABEL: or_build_pair_not: -; CHECK: # %bb.0: -; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: shlq $32, %rsi -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: notq %rax -; CHECK-NEXT: retq +; SSE-LABEL: or_build_pair_not: +; SSE: # %bb.0: +; SSE-NEXT: # kill: def $esi killed $esi def $rsi +; SSE-NEXT: shlq $32, %rsi +; SSE-NEXT: movl %edi, %eax +; SSE-NEXT: orq %rsi, %rax +; SSE-NEXT: notq %rax +; SSE-NEXT: retq +; +; AVX1-LABEL: or_build_pair_not: +; AVX1: # %bb.0: +; AVX1-NEXT: # kill: def $esi killed $esi def $rsi +; AVX1-NEXT: shlq $32, %rsi +; AVX1-NEXT: movl %edi, %eax +; AVX1-NEXT: orq %rsi, %rax +; AVX1-NEXT: notq %rax +; AVX1-NEXT: retq +; +; AVX2-LABEL: or_build_pair_not: +; AVX2: # %bb.0: +; AVX2-NEXT: # kill: def $esi killed $esi def $rsi +; AVX2-NEXT: shlq $32, %rsi +; AVX2-NEXT: notq %rsi +; AVX2-NEXT: movl %edi, %eax +; AVX2-NEXT: andnq %rsi, %rax, %rax +; AVX2-NEXT: retq %n0 = xor i32 %a0, -1 %n1 = xor i32 %a1, -1 %x0 = zext i32 %n0 to i64 @@ -262,10 +280,9 @@ define i64 @PR89533(<64 x i8> %a0) { ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: shlq $32, %rcx -; AVX2-NEXT: orq %rax, %rcx ; AVX2-NEXT: notq %rcx -; AVX2-NEXT: xorl %eax, %eax -; AVX2-NEXT: tzcntq %rcx, %rax +; AVX2-NEXT: andnq %rcx, %rax, %rax +; AVX2-NEXT: tzcntq %rax, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq %cmp = icmp ne <64 x i8> %a0, diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll index 7bc90534dcc6e..4e31177023b08 100644 --- a/llvm/test/CodeGen/X86/combine-srl.ll +++ b/llvm/test/CodeGen/X86/combine-srl.ll @@ -440,9 +440,9 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll index dbfa69d497698..7919495821efd 100644 --- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll +++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll @@ -1490,9 +1490,9 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrld $16, %xmm1 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pandn %xmm2, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrlw $1, %xmm1 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll index 23dcf334124c0..f59e53687ff74 100644 --- a/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll +++ b/llvm/test/CodeGen/X86/icmp-abs-C-vec.ll @@ -640,8 +640,8 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) { ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 ; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vzeroupper @@ -650,17 +650,18 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) { ; SSE41-LABEL: ne_and_to_abs_vec4x64: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = [129,129] -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqq %xmm1, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 -; SSE41-NEXT: pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487] -; SSE41-NEXT: pcmpeqq %xmm5, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqq %xmm5, %xmm1 -; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: packssdw %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 ; SSE41-NEXT: packssdw %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm4, %xmm0 +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: ne_and_to_abs_vec4x64: @@ -681,8 +682,9 @@ define <4 x i1> @ne_and_to_abs_vec4x64(<4 x i64> %x) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; SSE2-NEXT: andps %xmm4, %xmm0 -; SSE2-NEXT: orps %xmm2, %xmm0 ; SSE2-NEXT: xorps %xmm3, %xmm0 +; SSE2-NEXT: andnps %xmm0, %xmm2 +; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i64> %x, %cmp2 = icmp ne <4 x i64> %x, @@ -706,51 +708,51 @@ define <4 x i64> @ne_and_to_abs_vec4x64_sext(<4 x i64> %x) { ; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm3 = [18446744073709551487,18446744073709551487,18446744073709551487,18446744073709551487] ; AVX2-NEXT: vpcmpeqq %ymm3, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: ne_and_to_abs_vec4x64_sext: ; SSE41: # %bb.0: ; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = [129,129] -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: pcmpeqq %xmm2, %xmm3 -; SSE41-NEXT: pcmpeqq %xmm1, %xmm2 -; SSE41-NEXT: pcmpeqd %xmm4, %xmm4 -; SSE41-NEXT: pmovsxwq {{.*#+}} xmm5 = [18446744073709551487,18446744073709551487] -; SSE41-NEXT: pcmpeqq %xmm5, %xmm0 -; SSE41-NEXT: por %xmm3, %xmm0 -; SSE41-NEXT: pcmpeqq %xmm5, %xmm1 -; SSE41-NEXT: por %xmm2, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm0, %xmm2 +; SSE41-NEXT: packssdw %xmm3, %xmm2 +; SSE41-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE41-NEXT: pmovsxwq {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] +; SSE41-NEXT: pcmpeqq %xmm4, %xmm1 +; SSE41-NEXT: pcmpeqq %xmm4, %xmm0 ; SSE41-NEXT: packssdw %xmm1, %xmm0 -; SSE41-NEXT: pxor %xmm4, %xmm0 -; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 -; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] +; SSE41-NEXT: pxor %xmm3, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm2 +; SSE41-NEXT: pmovsxdq %xmm2, %xmm0 +; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,2,3,3] ; SSE41-NEXT: pslld $31, %xmm1 ; SSE41-NEXT: psrad $31, %xmm1 -; SSE41-NEXT: movdqa %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: ne_and_to_abs_vec4x64_sext: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [129,129] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [129,129] ; SSE2-NEXT: movdqa %xmm1, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm2, %xmm3 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm2 -; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm3 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm3[1,3] -; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] -; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm3[0,2] +; SSE2-NEXT: andps %xmm4, %xmm0 ; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [18446744073709551487,18446744073709551487] ; SSE2-NEXT: pcmpeqd %xmm4, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm4, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm4 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm2 +; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm1[1,3] -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] -; SSE2-NEXT: andps %xmm4, %xmm0 -; SSE2-NEXT: orps %xmm2, %xmm0 -; SSE2-NEXT: xorps %xmm3, %xmm0 +; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[0,2] +; SSE2-NEXT: andps %xmm4, %xmm2 +; SSE2-NEXT: xorps %xmm3, %xmm2 +; SSE2-NEXT: andnps %xmm2, %xmm0 ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: pcmpgtd %xmm0, %xmm2 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] @@ -868,8 +870,9 @@ define <4 x i1> @ne_and_to_abs_vec4x32(<4 x i32> %x) { ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i32> %x, %cmp2 = icmp ne <4 x i32> %x, @@ -909,8 +912,9 @@ define <4 x i32> @ne_and_to_abs_vec4x32_sext(<4 x i32> %x) { ; SSE2-NEXT: pcmpeqd %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i32> %x, %cmp2 = icmp ne <4 x i32> %x, @@ -1031,8 +1035,8 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpmovsxbd %xmm0, %xmm0 ; AVX2-NEXT: retq ; @@ -1042,21 +1046,22 @@ define <4 x i1> @ne_and_to_abs_vec4x8(<4 x i8> %x) { ; SSE41-NEXT: pcmpeqb %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE41-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 -; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: pmovsxbd %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: ne_and_to_abs_vec4x8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [88,88,88,88,u,u,u,u,u,u,u,u,u,u,u,u] ; SSE2-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i8> %x, %cmp2 = icmp ne <4 x i8> %x, @@ -1087,8 +1092,8 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) { ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: ne_and_to_abs_vec4x16_sext: @@ -1097,8 +1102,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) { ; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: ne_and_to_abs_vec4x16_sext: @@ -1107,8 +1113,9 @@ define <4 x i16> @ne_and_to_abs_vec4x16_sext(<4 x i16> %x) { ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <4 x i16> %x, %cmp2 = icmp ne <4 x i16> %x, diff --git a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll index dada1726be424..3fc2a323b5dc1 100644 --- a/llvm/test/CodeGen/X86/icmp-pow2-diff.ll +++ b/llvm/test/CodeGen/X86/icmp-pow2-diff.ll @@ -151,7 +151,7 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind { ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm2 & (xmm0 ^ xmm1) ; AVX512-NEXT: retq ; ; AVX2-LABEL: andnot_ne_v8i16_todo_no_splat: @@ -159,18 +159,19 @@ define <8 x i1> @andnot_ne_v8i16_todo_no_splat(<8 x i16> %x) nounwind { ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm2 ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm2, %xmm0 ; AVX2-NEXT: retq ; ; SSE-LABEL: andnot_ne_v8i16_todo_no_splat: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pcmpeqw %xmm1, %xmm2 +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: movdqa %xmm0, %xmm1 +; SSE-NEXT: pcmpeqw %xmm2, %xmm1 ; SSE-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pxor %xmm2, %xmm0 +; SSE-NEXT: pandn %xmm0, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq %cmp1 = icmp ne <8 x i16> %x, %cmp2 = icmp ne <8 x i16> %x, @@ -184,7 +185,7 @@ define <8 x i1> @andnot_ne_v8i16(<8 x i16> %x) nounwind { ; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0 ; AVX512-NEXT: retq ; ; AVX2-LABEL: andnot_ne_v8i16: @@ -215,28 +216,29 @@ define <16 x i1> @andnot_ne_v16i8_fail_max_not_n1(<16 x i8> %x) nounwind { ; AVX512-LABEL: andnot_ne_v16i8_fail_max_not_n1: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 -; AVX512-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $54, %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vpbroadcastd {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX512-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX512-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; AVX512-NEXT: retq ; ; AVX2-LABEL: andnot_ne_v16i8_fail_max_not_n1: ; AVX2: # %bb.0: ; AVX2-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm2 -; AVX2-NEXT: vpcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm2, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 +; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX2-NEXT: vpcmpgtb %xmm0, %xmm2, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; SSE-LABEL: andnot_ne_v16i8_fail_max_not_n1: ; SSE: # %bb.0: ; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pcmpeqb %xmm1, %xmm2 -; SSE-NEXT: pcmpgtb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pcmpeqb %xmm0, %xmm1 +; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; SSE-NEXT: pcmpgtb %xmm0, %xmm2 +; SSE-NEXT: pandn %xmm2, %xmm1 +; SSE-NEXT: movdqa %xmm1, %xmm0 ; SSE-NEXT: retq %cmp1 = icmp ne <16 x i8> %x, %cmp2 = icmp ne <16 x i8> %x, @@ -250,7 +252,7 @@ define <16 x i1> @andnot_ne_v16i8(<16 x i8> %x) nounwind { ; AVX512-NEXT: vpandnd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm0 ; AVX512-NEXT: retq ; ; AVX2-LABEL: andnot_ne_v16i8: @@ -309,7 +311,7 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind { ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX512-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $86, %xmm2, %xmm1, %xmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} xmm0 = ~xmm1 & (xmm0 ^ xmm2) ; AVX512-NEXT: retq ; ; AVX2-LABEL: addand_ne_v8i16_fail: @@ -317,8 +319,8 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind { ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; AVX2-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 ; AVX2-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0 ; AVX2-NEXT: retq ; ; SSE41-LABEL: addand_ne_v8i16_fail: @@ -327,8 +329,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind { ; SSE41-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE41-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE41-NEXT: por %xmm1, %xmm0 ; SSE41-NEXT: pxor %xmm2, %xmm0 +; SSE41-NEXT: pandn %xmm0, %xmm1 +; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE2-LABEL: addand_ne_v8i16_fail: @@ -337,8 +340,9 @@ define <8 x i1> @addand_ne_v8i16_fail(<8 x i16> %x) nounwind { ; SSE2-NEXT: pcmpeqw %xmm0, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pandn %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq %cmp1 = icmp ne <8 x i16> %x, %cmp2 = icmp ne <8 x i16> %x, diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index badfd1af940ca..478d80e9827a5 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -179,19 +179,23 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) { ; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm1 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm6 ; CHECK-NOBMI-NEXT: pcmpgtd %xmm4, %xmm6 +; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; CHECK-NOBMI-NEXT: pand %xmm6, %xmm1 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3] +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4 +; CHECK-NOBMI-NEXT: pandn %xmm4, %xmm1 ; CHECK-NOBMI-NEXT: pxor %xmm5, %xmm3 ; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm0 -; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm5 -; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm5 -; CHECK-NOBMI-NEXT: movdqa %xmm5, %xmm7 -; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2] -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4 +; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm4 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm0 -; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] -; CHECK-NOBMI-NEXT: andps %xmm7, %xmm0 -; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3] -; CHECK-NOBMI-NEXT: orps %xmm5, %xmm0 -; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0 +; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] +; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm3 +; CHECK-NOBMI-NEXT: pandn %xmm3, %xmm0 +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] ; CHECK-NOBMI-NEXT: retq ; ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z: diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll index c84a1159ad56a..0713f0bbe244c 100644 --- a/llvm/test/CodeGen/X86/machine-cp.ll +++ b/llvm/test/CodeGen/X86/machine-cp.ll @@ -100,55 +100,38 @@ define <16 x float> @foo(<16 x float> %x) { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: xorps %xmm5, %xmm5 -; CHECK-NEXT: cvttps2dq %xmm3, %xmm8 +; CHECK-NEXT: cvttps2dq %xmm3, %xmm6 ; CHECK-NEXT: movaps %xmm3, %xmm4 ; CHECK-NEXT: cmpltps %xmm5, %xmm4 -; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16] -; CHECK-NEXT: movaps %xmm4, %xmm6 -; CHECK-NEXT: orps %xmm7, %xmm6 -; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3 -; CHECK-NEXT: andps %xmm7, %xmm3 -; CHECK-NEXT: andps %xmm6, %xmm3 -; CHECK-NEXT: andnps %xmm4, %xmm6 -; CHECK-NEXT: cvttps2dq %xmm2, %xmm4 +; CHECK-NEXT: cvttps2dq %xmm2, %xmm3 ; CHECK-NEXT: movaps %xmm2, %xmm7 ; CHECK-NEXT: cmpltps %xmm5, %xmm7 -; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12] -; CHECK-NEXT: movaps %xmm7, %xmm9 -; CHECK-NEXT: orps %xmm8, %xmm9 -; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2 -; CHECK-NEXT: andps %xmm8, %xmm2 -; CHECK-NEXT: andps %xmm9, %xmm2 -; CHECK-NEXT: andnps %xmm7, %xmm9 -; CHECK-NEXT: cvttps2dq %xmm1, %xmm4 -; CHECK-NEXT: cmpltps %xmm5, %xmm1 -; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8] +; CHECK-NEXT: cvttps2dq %xmm1, %xmm2 ; CHECK-NEXT: movaps %xmm1, %xmm8 -; CHECK-NEXT: orps %xmm7, %xmm8 -; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4 -; CHECK-NEXT: andps %xmm7, %xmm4 -; CHECK-NEXT: andps %xmm8, %xmm4 -; CHECK-NEXT: andnps %xmm1, %xmm8 +; CHECK-NEXT: cmpltps %xmm5, %xmm8 ; CHECK-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK-NEXT: cmpltps %xmm5, %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm9 +; CHECK-NEXT: cmpltps %xmm5, %xmm9 ; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4] -; CHECK-NEXT: movaps %xmm0, %xmm7 -; CHECK-NEXT: orps %xmm5, %xmm7 -; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 -; CHECK-NEXT: andps %xmm5, %xmm1 -; CHECK-NEXT: andps %xmm7, %xmm1 -; CHECK-NEXT: andnps %xmm0, %xmm7 -; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] -; CHECK-NEXT: andps %xmm0, %xmm7 -; CHECK-NEXT: orps %xmm7, %xmm1 -; CHECK-NEXT: andps %xmm0, %xmm8 -; CHECK-NEXT: orps %xmm8, %xmm4 -; CHECK-NEXT: andps %xmm0, %xmm9 -; CHECK-NEXT: orps %xmm9, %xmm2 -; CHECK-NEXT: andps %xmm0, %xmm6 -; CHECK-NEXT: orps %xmm6, %xmm3 -; CHECK-NEXT: movaps %xmm1, %xmm0 -; CHECK-NEXT: movaps %xmm4, %xmm1 +; CHECK-NEXT: orps %xmm5, %xmm9 +; CHECK-NEXT: movaps {{.*#+}} xmm10 = [5,6,7,8] +; CHECK-NEXT: orps %xmm10, %xmm8 +; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] +; CHECK-NEXT: orps %xmm11, %xmm7 +; CHECK-NEXT: movaps {{.*#+}} xmm12 = [13,14,15,16] +; CHECK-NEXT: orps %xmm12, %xmm4 +; CHECK-NEXT: cvtdq2ps %xmm1, %xmm0 +; CHECK-NEXT: cvtdq2ps %xmm2, %xmm1 +; CHECK-NEXT: cvtdq2ps %xmm3, %xmm2 +; CHECK-NEXT: cvtdq2ps %xmm6, %xmm3 +; CHECK-NEXT: andps %xmm5, %xmm0 +; CHECK-NEXT: andps %xmm9, %xmm0 +; CHECK-NEXT: andps %xmm10, %xmm1 +; CHECK-NEXT: andps %xmm8, %xmm1 +; CHECK-NEXT: andps %xmm11, %xmm2 +; CHECK-NEXT: andps %xmm7, %xmm2 +; CHECK-NEXT: andps %xmm12, %xmm3 +; CHECK-NEXT: andps %xmm4, %xmm3 ; CHECK-NEXT: retq bb: %v3 = icmp slt <16 x i32> , zeroinitializer diff --git a/llvm/test/CodeGen/X86/mul-cmp.ll b/llvm/test/CodeGen/X86/mul-cmp.ll index 0ee4601acf694..4fffb42bdc672 100644 --- a/llvm/test/CodeGen/X86/mul-cmp.ll +++ b/llvm/test/CodeGen/X86/mul-cmp.ll @@ -119,21 +119,21 @@ define <4 x i1> @mul_nsw_ne0_v4i32(<4 x i32> %x, <4 x i32> %y) { ; SSE-LABEL: mul_nsw_ne0_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: pxor %xmm2, %xmm2 -; SSE-NEXT: pcmpeqd %xmm2, %xmm1 ; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pcmpeqd %xmm2, %xmm1 +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pandn %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: mul_nsw_ne0_v4i32: ; AVX: # %bb.0: ; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 ; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0 -; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %m = mul nsw <4 x i32> %x, %y %r = icmp ne <4 x i32> %m, zeroinitializer diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll index 88934a382bbfa..aeb8fe93930a0 100644 --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -8,34 +8,36 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE2-LABEL: PR45808: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: pxor %xmm4, %xmm5 -; SSE2-NEXT: movdqa %xmm1, %xmm6 +; SSE2-NEXT: movdqa %xmm3, %xmm6 ; SSE2-NEXT: pxor %xmm4, %xmm6 -; SSE2-NEXT: movdqa %xmm6, %xmm7 -; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 +; SSE2-NEXT: movdqa %xmm1, %xmm7 +; SSE2-NEXT: pxor %xmm4, %xmm7 +; SSE2-NEXT: movdqa %xmm7, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 ; SSE2-NEXT: movdqa %xmm2, %xmm8 ; SSE2-NEXT: pxor %xmm4, %xmm8 ; SSE2-NEXT: pxor %xmm0, %xmm4 ; SSE2-NEXT: movdqa %xmm4, %xmm9 ; SSE2-NEXT: pcmpgtd %xmm8, %xmm9 ; SSE2-NEXT: movdqa %xmm9, %xmm10 -; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2] -; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 +; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2] +; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3] ; SSE2-NEXT: andps %xmm10, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3] -; SSE2-NEXT: orps %xmm4, %xmm9 -; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 -; SSE2-NEXT: pxor %xmm9, %xmm4 -; SSE2-NEXT: pxor %xmm5, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 -; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] +; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3] +; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 +; SSE2-NEXT: pxor %xmm9, %xmm6 +; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3] +; SSE2-NEXT: pandn %xmm6, %xmm4 +; SSE2-NEXT: pxor %xmm6, %xmm6 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: pandn %xmm2, %xmm4 ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3] +; SSE2-NEXT: por %xmm7, %xmm2 ; SSE2-NEXT: pslld $31, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/sat-add.ll b/llvm/test/CodeGen/X86/sat-add.ll index b12be7cb129d3..37bc8ded142c1 100644 --- a/llvm/test/CodeGen/X86/sat-add.ll +++ b/llvm/test/CodeGen/X86/sat-add.ll @@ -1004,9 +1004,10 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32> ; SSE2-NEXT: pxor %xmm1, %xmm4 ; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 ; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm4 ; SSE2-NEXT: pxor %xmm2, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm4, %xmm2 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: paddd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -1147,9 +1148,10 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64> ; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] ; SSE2-NEXT: por %xmm3, %xmm4 ; SSE2-NEXT: pand %xmm4, %xmm0 -; SSE2-NEXT: por %xmm1, %xmm4 ; SSE2-NEXT: pxor %xmm2, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm4, %xmm2 +; SSE2-NEXT: por %xmm2, %xmm0 ; SSE2-NEXT: paddq %xmm1, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index f526db00df606..d97e603c636af 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: retq %na = xor <2 x i64> %a, %nb = xor <2 x i64> %b, diff --git a/llvm/test/CodeGen/X86/setcc-logic.ll b/llvm/test/CodeGen/X86/setcc-logic.ll index c98aae7fbf405..4b1225c7ac1d8 100644 --- a/llvm/test/CodeGen/X86/setcc-logic.ll +++ b/llvm/test/CodeGen/X86/setcc-logic.ll @@ -541,9 +541,10 @@ define <4 x i32> @and_icmps_const_1bit_diff_vec(<4 x i32> %x) { ; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [44,60,44,60] ; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 ; CHECK-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm0, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %a = icmp ne <4 x i32> %x, %b = icmp ne <4 x i32> %x, diff --git a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll index 2d0778853fecd..aad6abfa78c23 100644 --- a/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/X86/srem-seteq-vec-nonsplat.ll @@ -2401,16 +2401,16 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX1-NEXT: vpor %xmm5, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpsubb %xmm3, %xmm0, %xmm0 ; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 -; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm3 +; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 +; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm0 +; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm4, %xmm4 +; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm4, %xmm3 ; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 ; CHECK-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 ; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm3 ; CHECK-AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 ; CHECK-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 -; CHECK-AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0 -; CHECK-AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-AVX1-NEXT: vcmptrueps %ymm1, %ymm1, %ymm1 -; CHECK-AVX1-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; CHECK-AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0 ; CHECK-AVX1-NEXT: retq ; ; CHECK-AVX2-LABEL: pr51133: @@ -2450,10 +2450,10 @@ define <32 x i1> @pr51133(<32 x i8> %x, <32 x i8> %y) { ; CHECK-AVX2-NEXT: vpor %ymm3, %ymm4, %ymm3 ; CHECK-AVX2-NEXT: vpsubb %ymm3, %ymm0, %ymm0 ; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; CHECK-AVX2-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3 +; CHECK-AVX2-NEXT: vpxor %ymm3, %ymm0, %ymm0 ; CHECK-AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 -; CHECK-AVX2-NEXT: vpor %ymm0, %ymm1, %ymm0 -; CHECK-AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; CHECK-AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; CHECK-AVX2-NEXT: vpandn %ymm0, %ymm1, %ymm0 ; CHECK-AVX2-NEXT: retq ; ; CHECK-AVX512VL-LABEL: pr51133: diff --git a/llvm/test/CodeGen/X86/sshl_sat_vec.ll b/llvm/test/CodeGen/X86/sshl_sat_vec.ll index 10dee14bdd1a0..82c157c207375 100644 --- a/llvm/test/CodeGen/X86/sshl_sat_vec.ll +++ b/llvm/test/CodeGen/X86/sshl_sat_vec.ll @@ -37,9 +37,9 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; X64-NEXT: pand %xmm2, %xmm0 ; X64-NEXT: pxor %xmm5, %xmm5 ; X64-NEXT: pcmpgtd %xmm4, %xmm5 -; X64-NEXT: por %xmm2, %xmm5 -; X64-NEXT: pcmpeqd %xmm2, %xmm2 -; X64-NEXT: pxor %xmm5, %xmm2 +; X64-NEXT: pcmpeqd %xmm4, %xmm4 +; X64-NEXT: pxor %xmm5, %xmm4 +; X64-NEXT: pandn %xmm4, %xmm2 ; X64-NEXT: por %xmm0, %xmm2 ; X64-NEXT: pandn %xmm2, %xmm1 ; X64-NEXT: por %xmm3, %xmm1 diff --git a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll index 58fd6492f2ed5..00d122838dbc5 100644 --- a/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll +++ b/llvm/test/CodeGen/X86/unfold-masked-merge-vector-variablemask-const.ll @@ -127,14 +127,21 @@ define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) { ; ; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: ; CHECK-SSE2: # %bb.0: -; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0 -; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 +; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm2 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm2 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm0 +; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: ; CHECK-XOP: # %bb.0: -; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0 -; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 +; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 +; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; CHECK-XOP-NEXT: vpxor (%rdx), %xmm1, %xmm2 +; CHECK-XOP-NEXT: vpandn %xmm2, %xmm0, %xmm0 +; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll index 84856aab85079..6e68b37bec98a 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: por %xmm1, %xmm0 -; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 +; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-SSE2-NEXT: pxor %xmm0, %xmm2 +; CHECK-SSE2-NEXT: pandn %xmm2, %xmm1 ; CHECK-SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; CHECK-SSE2-NEXT: retq ; @@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE41-NEXT: pand %xmm2, %xmm1 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: por %xmm1, %xmm0 -; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-SSE41-NEXT: pxor %xmm0, %xmm1 +; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-SSE41-NEXT: pxor %xmm0, %xmm2 +; CHECK-SSE41-NEXT: pandn %xmm2, %xmm1 ; CHECK-SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; CHECK-SSE41-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll index ac4b25be5eb65..25ba593d47062 100644 --- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll @@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-LABEL: ne_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512-LABEL: ne_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512-LABEL: ne_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: ne_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ge_v2i64: @@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: ge_v2i64: @@ -349,7 +349,7 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-LABEL: ge_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -388,7 +388,7 @@ define <4 x i32> @ge_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512-LABEL: ge_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -427,7 +427,7 @@ define <8 x i16> @ge_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512-LABEL: ge_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -466,7 +466,7 @@ define <16 x i8> @ge_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: ge_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: le_v2i64: @@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: le_v2i64: @@ -657,7 +657,7 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-LABEL: le_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -696,7 +696,7 @@ define <4 x i32> @le_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512-LABEL: le_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -735,7 +735,7 @@ define <8 x i16> @le_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512-LABEL: le_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -774,7 +774,7 @@ define <16 x i8> @le_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: le_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll index 9a0756edbce32..bd730e7dbefbc 100644 --- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll @@ -155,7 +155,7 @@ define <2 x i64> @ne_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-LABEL: ne_v2i64: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -194,7 +194,7 @@ define <4 x i32> @ne_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512-LABEL: ne_v4i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -233,7 +233,7 @@ define <8 x i16> @ne_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512-LABEL: ne_v8i16: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -272,7 +272,7 @@ define <16 x i8> @ne_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512-LABEL: ne_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ge_v2i64: @@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: ge_v2i64: @@ -535,7 +535,7 @@ define <2 x i64> @gt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm1 ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -594,7 +594,7 @@ define <4 x i32> @gt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -653,7 +653,7 @@ define <8 x i16> @gt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -696,7 +696,7 @@ define <16 x i8> @gt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: le_v2i64: @@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: le_v2i64: @@ -960,7 +960,7 @@ define <2 x i64> @lt_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512-NEXT: vpmaxuq %zmm1, %zmm0, %zmm1 ; AVX512-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1020,7 +1020,7 @@ define <4 x i32> @lt_v4i32(<4 x i32> %a, <4 x i32> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpmaxud %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1080,7 +1080,7 @@ define <8 x i16> @lt_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1123,7 +1123,7 @@ define <16 x i8> @lt_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; AVX512: # %bb.0: ; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; AVX512-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll index c1045c7b72f2c..0fc298a2b4cd4 100644 --- a/llvm/test/CodeGen/X86/vec_compare.ll +++ b/llvm/test/CodeGen/X86/vec_compare.ll @@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: retl %C = icmp sge <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: retl %C = icmp sle <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: retl %C = icmp uge <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: retl %C = icmp ule <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll index 370f88d644b57..048117dd43e66 100644 --- a/llvm/test/CodeGen/X86/vec_ctbits.ll +++ b/llvm/test/CodeGen/X86/vec_ctbits.ll @@ -52,9 +52,9 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlq $32, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -154,9 +154,9 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $16, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll index 5a71878ea4579..ade6b5c8d6bdf 100644 --- a/llvm/test/CodeGen/X86/vec_setcc-2.ll +++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll @@ -448,13 +448,14 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) { ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] -; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pand %xmm2, %xmm1 ; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: por %xmm1, %xmm2 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm2, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm2, %xmm3 +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pand %xmm0, %xmm1 +; SSE2-NEXT: pandn %xmm3, %xmm1 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v2i64_splat: diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll index cfb5fac2fd7aa..990113b1ecc1e 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll @@ -33,9 +33,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlq $32, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -73,9 +73,9 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlq $32, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -311,9 +311,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlq $32, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -351,9 +351,9 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlq $32, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -586,9 +586,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -628,9 +628,9 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrld $16, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -835,9 +835,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $16, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -877,9 +877,9 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrld $16, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1081,9 +1081,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1117,9 +1117,9 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $8, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1289,9 +1289,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1325,9 +1325,9 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $8, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: pandn %xmm2, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1498,9 +1498,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1531,9 +1531,9 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE3-NEXT: pxor %xmm1, %xmm3 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1661,9 +1661,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { ; SSE2-NEXT: psrlw $4, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1694,9 +1694,9 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { ; SSE3-NEXT: psrlw $4, %xmm1 ; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] ; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: pcmpeqd %xmm1, %xmm1 -; SSE3-NEXT: pxor %xmm1, %xmm0 +; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 +; SSE3-NEXT: pxor %xmm1, %xmm3 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll index d35a365508d54..8c24aa50a626e 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll @@ -28,17 +28,17 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -56,28 +56,30 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpandn %ymm1, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6 +; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0) ret <8 x i64> %out @@ -107,17 +109,17 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -135,28 +137,30 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpandn %ymm1, %ymm0, %ymm2 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm3 -; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm4 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6 +; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0 ; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1) ret <8 x i64> %out @@ -184,17 +188,17 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 @@ -214,34 +218,35 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpandn %ymm0, %ymm1, %ymm2 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm4 -; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] -; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5 +; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5 ; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm6, %ymm6 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpackuswb %ymm6, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpand %ymm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3 +; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq @@ -271,17 +276,17 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpandnq %zmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm3 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm0 = ~zmm0 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 +; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 @@ -301,34 +306,35 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpandn %ymm0, %ymm1, %ymm2 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm1 = ~zmm1 -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm1, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm4 -; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] -; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5 +; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1] +; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5 ; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm0, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm5, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm6 = ymm3[2],ymm4[2],ymm3[3],ymm4[3],ymm3[6],ymm4[6],ymm3[7],ymm4[7] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm6, %ymm6 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm4[0],ymm3[1],ymm4[1],ymm3[4],ymm4[4],ymm3[5],ymm4[5] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpackuswb %ymm6, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpand %ymm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm4[2],ymm0[3],ymm4[3],ymm0[6],ymm4[6],ymm0[7],ymm4[7] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm4[0],ymm0[1],ymm4[1],ymm0[4],ymm4[4],ymm0[5],ymm4[5] -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3 +; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll index a1b277efde6ff..1473da6aac5ea 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll @@ -20,9 +20,9 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $16, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index c1d30b6d5a995..d8e955c93581e 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) { ; SSE-NEXT: pcmpgtd %xmm2, %xmm3 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] ; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE-NEXT: pand %xmm4, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] -; SSE-NEXT: por %xmm2, %xmm0 -; SSE-NEXT: pxor %xmm1, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE-NEXT: pand %xmm4, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] +; SSE-NEXT: pxor %xmm1, %xmm2 +; SSE-NEXT: pandn %xmm2, %xmm0 ; SSE-NEXT: retq ; ; AVX1OR2-LABEL: ne_1_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index 97124f0a9d8d9..55f2258aad018 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v2i64: @@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: uge_v2i64: @@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: por %xmm0, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm0 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: pandn %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v2i64: @@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: por %xmm0, %xmm1 -; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 -; SSE41-NEXT: pxor %xmm1, %xmm0 +; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE41-NEXT: pxor %xmm1, %xmm2 +; SSE41-NEXT: pandn %xmm2, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ule_v2i64: diff --git a/llvm/test/CodeGen/X86/vsplit-and.ll b/llvm/test/CodeGen/X86/vsplit-and.ll index 833db0efbda89..90bbde645cd08 100644 --- a/llvm/test/CodeGen/X86/vsplit-and.ll +++ b/llvm/test/CodeGen/X86/vsplit-and.ll @@ -7,9 +7,9 @@ define void @t0(ptr %dst, <2 x i64> %src1, <2 x i64> %src2) nounwind readonly { ; CHECK-NEXT: pxor %xmm2, %xmm2 ; CHECK-NEXT: pcmpeqq %xmm2, %xmm0 ; CHECK-NEXT: pcmpeqq %xmm2, %xmm1 -; CHECK-NEXT: por %xmm0, %xmm1 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm1, %xmm0 +; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 +; CHECK-NEXT: pxor %xmm1, %xmm2 +; CHECK-NEXT: pandn %xmm2, %xmm0 ; CHECK-NEXT: movdqa %xmm0, (%rdi) ; CHECK-NEXT: retq %cmp1 = icmp ne <2 x i64> %src1, zeroinitializer @@ -32,19 +32,19 @@ define void @t2(ptr %dst, <3 x i64> %src1, <3 x i64> %src2) nounwind readonly { ; CHECK-NEXT: movq %rcx, %xmm0 ; CHECK-NEXT: movq {{.*#+}} xmm3 = mem[0],zero ; CHECK-NEXT: pxor %xmm4, %xmm4 -; CHECK-NEXT: pcmpeqq %xmm4, %xmm2 ; CHECK-NEXT: pcmpeqq %xmm4, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm5, %xmm5 -; CHECK-NEXT: pcmpeqq %xmm4, %xmm1 -; CHECK-NEXT: por %xmm2, %xmm1 +; CHECK-NEXT: pcmpeqq %xmm4, %xmm2 +; CHECK-NEXT: packssdw %xmm0, %xmm2 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: pcmpeqq %xmm4, %xmm3 -; CHECK-NEXT: por %xmm0, %xmm3 +; CHECK-NEXT: pcmpeqq %xmm4, %xmm1 ; CHECK-NEXT: packssdw %xmm3, %xmm1 -; CHECK-NEXT: pxor %xmm5, %xmm1 -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,2,2] +; CHECK-NEXT: pxor %xmm0, %xmm1 +; CHECK-NEXT: pandn %xmm1, %xmm2 +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,2,2,2] ; CHECK-NEXT: pslld $31, %xmm0 ; CHECK-NEXT: psrad $31, %xmm0 -; CHECK-NEXT: pmovsxdq %xmm1, %xmm1 +; CHECK-NEXT: pmovsxdq %xmm2, %xmm1 ; CHECK-NEXT: movdqa %xmm1, (%rdi) ; CHECK-NEXT: movq %xmm0, 16(%rdi) ; CHECK-NEXT: retq From e2032efe50c3c421c98575d822442dfde65dab71 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 06:56:43 +0000 Subject: [PATCH 15/20] [PowerPC]: Updated tests --- llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll | 1 + .../CodeGen/PowerPC/fp-strict-fcmp-spe.ll | 24 ++-- .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 19 ++- llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll | 65 +++++++++ .../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll | 19 ++- .../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 19 ++- .../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 15 ++- .../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 15 ++- .../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll | 19 ++- .../CodeGen/PowerPC/xxeval-vselect-x-nor.ll | 125 ++++++++++++++---- .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 19 ++- 11 files changed, 268 insertions(+), 72 deletions(-) diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll index bea24ee98336d..ed8dc504f026a 100644 --- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll +++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \ ; RUN: grep eqv | count 3 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \ diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll index c20d319f2ac79..78644691fb646 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll @@ -113,14 +113,12 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 { define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; SPE-LABEL: test_f32_ueq_s: ; SPE: # %bb.0: -; SPE-NEXT: efscmplt cr0, r5, r6 -; SPE-NEXT: bc 12, gt, .LBB7_3 -; SPE-NEXT: # %bb.1: ; SPE-NEXT: efscmpgt cr0, r5, r6 -; SPE-NEXT: bc 12, gt, .LBB7_3 -; SPE-NEXT: # %bb.2: -; SPE-NEXT: mr r4, r3 -; SPE-NEXT: .LBB7_3: +; SPE-NEXT: bc 12, gt, .LBB7_2 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: efscmplt cr0, r5, r6 +; SPE-NEXT: bclr 4, gt, 0 +; SPE-NEXT: .LBB7_2: ; SPE-NEXT: mr r3, r4 ; SPE-NEXT: blr %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 @@ -355,14 +353,12 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; SPE: # %bb.0: ; SPE-NEXT: evmergelo r7, r7, r8 ; SPE-NEXT: evmergelo r5, r5, r6 -; SPE-NEXT: efdcmplt cr0, r5, r7 -; SPE-NEXT: bc 12, gt, .LBB21_3 -; SPE-NEXT: # %bb.1: ; SPE-NEXT: efdcmpgt cr0, r5, r7 -; SPE-NEXT: bc 12, gt, .LBB21_3 -; SPE-NEXT: # %bb.2: -; SPE-NEXT: mr r4, r3 -; SPE-NEXT: .LBB21_3: +; SPE-NEXT: bc 12, gt, .LBB21_2 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: efdcmplt cr0, r5, r7 +; SPE-NEXT: bclr 4, gt, 0 +; SPE-NEXT: .LBB21_2: ; SPE-NEXT: mr r3, r4 ; SPE-NEXT: blr %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll index c23daac80279b..872a08c20eae8 100644 --- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll +++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll @@ -1,29 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; Check the miscellaneous logical vector operations added in P8 -; +; ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; Test x eqv y define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: test_veqv: +; CHECK: # %bb.0: +; CHECK-NEXT: veqv 2, 2, 3 +; CHECK-NEXT: blr %tmp = xor <4 x i32> %x, %y %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1> ret <4 x i32> %ret_val -; CHECK: veqv 2, 2, 3 } ; Test x vnand y define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: test_vnand: +; CHECK: # %bb.0: +; CHECK-NEXT: vnand 2, 2, 3 +; CHECK-NEXT: blr %tmp = and <4 x i32> %x, %y %ret_val = xor <4 x i32> %tmp, ret <4 x i32> %ret_val -; CHECK: vnand 2, 2, 3 } ; Test x vorc y and variants define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind { +; CHECK-LABEL: test_vorc: +; CHECK: # %bb.0: +; CHECK-NEXT: vor 2, 3, 2 +; CHECK-NEXT: blr %tmp1 = xor <4 x i32> %y, %tmp2 = or <4 x i32> %x, %tmp1 -; CHECK: vorc 3, 2, 3 %tmp3 = xor <4 x i32> %tmp2, %tmp4 = or <4 x i32> %tmp3, %x -; CHECK: vorc 2, 2, 3 ret <4 x i32> %tmp4 } diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll index ba74df956e71e..7f7a52fe7de65 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -6,6 +7,10 @@ ; CHECK: xxlandc v2, v2, v3 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_not: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxlandc v2, v2, v3 +; CHECK-NEXT: blr entry: %neg = xor <4 x i32> %B, %and = and <4 x i32> %neg, %A @@ -17,6 +22,10 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_and8: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 +; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %A %and1 = and <16 x i8> %and, %C @@ -28,6 +37,10 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_and16: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 +; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %A %and1 = and <8 x i16> %and, %C @@ -39,6 +52,10 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_and32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 +; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %A %and1 = and <4 x i32> %and, %C @@ -50,6 +67,10 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_and64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 +; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %A %and1 = and <2 x i64> %and, %C @@ -61,6 +82,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 14 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_nand: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 14 +; CHECK-NEXT: blr entry: %and = and <4 x i32> %C, %B %neg = xor <4 x i32> %and, @@ -73,6 +98,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 7 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 7 +; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %and = and <4 x i32> %or, %A @@ -84,6 +113,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 8 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_nor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 8 +; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %neg = xor <4 x i32> %or, @@ -96,6 +129,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 6 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 6 +; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %and = and <4 x i32> %xor, %A @@ -107,6 +144,10 @@ entry: ; CHECK: xxeval v2, v2, v3, v4, 9 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: and_eqv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v3, v4, 9 +; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %neg = xor <4 x i32> %xor, %C @@ -119,6 +160,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 241 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_nand: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 241 +; CHECK-NEXT: blr entry: %and = and <4 x i32> %C, %B %A.not = xor <4 x i32> %A, @@ -131,6 +176,10 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 254 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_and: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v3, v2, v4, 254 +; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %A %and1 = and <4 x i32> %and, %C @@ -143,6 +192,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 249 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_xor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 249 +; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %and = and <4 x i32> %xor, %A @@ -155,6 +208,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 246 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_eqv: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 246 +; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %A.not = xor <4 x i32> %A, @@ -167,6 +224,10 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 248 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_or: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v4, v3, 248 +; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %and = and <4 x i32> %or, %A @@ -179,6 +240,10 @@ entry: ; CHECK: xxeval v2, v2, v3, v4, 247 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { +; CHECK-LABEL: nand_nor: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xxeval v2, v2, v3, v4, 247 +; CHECK-NEXT: blr entry: %A.not = xor <4 x i32> %A, %or = or <4 x i32> %A.not, %B diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll index 6616a1e6e7e9f..ba5c9edb3897d 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll @@ -32,7 +32,10 @@ entry: define dso_local <8 x i16> @eqvA_B_C(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 { ; CHECK-LABEL: eqvA_B_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 150 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -48,7 +51,8 @@ entry: define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 { ; CHECK-LABEL: norA_andB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 224 +; CHECK-NEXT: xxlnor vs0, v2, v2 +; CHECK-NEXT: xxeval v2, vs0, v3, v4, 14 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -100,7 +104,8 @@ entry: define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { ; CHECK-LABEL: norA_xorB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 144 +; CHECK-NEXT: xxlnor vs0, v2, v2 +; CHECK-NEXT: xxeval v2, vs0, v3, v4, 9 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -113,7 +118,9 @@ entry: define dso_local <4 x i32> @norA_B_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { ; CHECK-LABEL: norA_B_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 128 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 16 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -164,7 +171,9 @@ entry: define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { ; CHECK-LABEL: orA_norB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 143 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 31 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll index b41220b01373a..f98edc21bf2ea 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll @@ -80,9 +80,11 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -97,10 +99,13 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -115,9 +120,12 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxland vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -132,9 +140,12 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxland vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll index a51e392279d55..0baa420b79761 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll @@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 +; CHECK-NEXT: xxsel v2, v3, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll index 54bf6c03f8c1a..6fc822d729457 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll @@ -77,8 +77,9 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -92,10 +93,12 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -109,9 +112,11 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -125,9 +130,11 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 +; CHECK-NEXT: xxsel v2, v4, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll index ba7680b27cc17..78ae36cc0ecf7 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll @@ -84,9 +84,11 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -102,10 +104,13 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -121,9 +126,12 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -139,9 +147,12 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxleqv vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll index 369587454a7c1..90928e668afd8 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll @@ -15,9 +15,11 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -32,10 +34,13 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -50,9 +55,12 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -67,9 +75,12 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxland vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -85,8 +96,9 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -100,10 +112,12 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -117,9 +131,11 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -133,9 +149,11 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 +; CHECK-NEXT: xxsel v2, vs0, v3, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -150,8 +168,9 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -165,10 +184,12 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -182,9 +203,11 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -198,9 +221,11 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 +; CHECK-NEXT: xxsel v2, vs0, v4, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -214,9 +239,11 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -231,10 +258,13 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -249,9 +279,12 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -266,9 +299,12 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlxor vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -283,9 +319,11 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, ; Vector not operation @@ -300,10 +338,13 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, ; Vector not operation @@ -318,9 +359,12 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxlnor vs1, v4, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 +; CHECK-NEXT: xxsel v2, vs0, vs1, v2 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, ; Vector not operation @@ -335,9 +379,12 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnor vs0, v4, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, ; Vector not operation @@ -352,9 +399,11 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnor vs0, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %B, ; Vector not operation @@ -369,10 +418,13 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnor vs0, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %B, ; Vector not operation @@ -387,9 +439,12 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxlnor vs1, v3, v3 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 +; CHECK-NEXT: xxsel v2, vs0, vs1, v2 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %B, ; Vector not operation @@ -404,9 +459,12 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnor vs0, v3, v3 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %B, ; Vector not operation @@ -421,9 +479,11 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -439,10 +499,13 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -458,9 +521,12 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -476,9 +542,12 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs1, vs1, vs1 +; CHECK-NEXT: xxlnand vs0, v3, v4 +; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll index 0fc296cc5a4e2..5031ebc930e11 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll @@ -267,9 +267,11 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 +; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -284,10 +286,13 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -302,9 +307,12 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxlxor vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -319,9 +327,12 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 +; CHECK-NEXT: xxleqv vs0, vs0, vs0 +; CHECK-NEXT: xxlxor vs1, v3, v4 +; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 +; CHECK-NEXT: xxsel v2, vs1, vs0, v2 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C From 52185882fc9f24090c54db0649e4121320592de1 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 07:33:47 +0000 Subject: [PATCH 16/20] [X86]: Removed reversing of rewriting demorgan --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a0b64ff370b10..e870514db2443 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -55615,10 +55615,12 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, // Folds for better commutativity: if (N1->hasOneUse()) { + /* // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)). if (SDValue Not = IsNOT(N1, DAG)) return DAG.getNOT( DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT); + */ // ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x)) // Zero out elements by setting the PSHUFB mask value to 0xFF. From de10f4a6eeae333b8e0972ff499f09015b72c203 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Sat, 18 Oct 2025 07:34:18 +0000 Subject: [PATCH 17/20] [X86][PowerPC][AArch64]: Updated tests --- llvm/test/CodeGen/AArch64/bsl.ll | 120 ++++---- .../CodeGen/AArch64/build-vector-dup-simd.ll | 24 +- .../CodeGen/AArch64/fp16-v4-instructions.ll | 44 +-- .../CodeGen/AArch64/fp16-v8-instructions.ll | 50 +--- llvm/test/CodeGen/AArch64/sve2-bsl.ll | 36 +-- .../CodeGen/PowerPC/fp-strict-fcmp-spe.ll | 24 +- .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 3 +- .../CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll | 10 +- .../CodeGen/PowerPC/xxeval-vselect-x-and.ll | 19 +- .../CodeGen/PowerPC/xxeval-vselect-x-b.ll | 15 +- .../CodeGen/PowerPC/xxeval-vselect-x-c.ll | 15 +- .../CodeGen/PowerPC/xxeval-vselect-x-eqv.ll | 19 +- .../CodeGen/PowerPC/xxeval-vselect-x-nor.ll | 125 ++------ .../CodeGen/PowerPC/xxeval-vselect-x-xor.ll | 19 +- llvm/test/CodeGen/X86/abds-vector-128.ll | 6 +- .../test/CodeGen/X86/avx512-mask-bit-manip.ll | 25 +- llvm/test/CodeGen/X86/combine-or.ll | 39 +-- llvm/test/CodeGen/X86/combine-srl.ll | 9 +- .../CodeGen/X86/expand-vp-int-intrinsics.ll | 9 +- llvm/test/CodeGen/X86/ispow2.ll | 24 +- llvm/test/CodeGen/X86/machine-cp.ll | 67 +++-- llvm/test/CodeGen/X86/promote-cmp.ll | 34 +-- llvm/test/CodeGen/X86/setcc-combine.ll | 6 +- .../X86/urem-seteq-vec-tautological.ll | 12 +- llvm/test/CodeGen/X86/vec_cmp_sint-128.ll | 24 +- llvm/test/CodeGen/X86/vec_cmp_uint-128.ll | 24 +- llvm/test/CodeGen/X86/vec_compare.ll | 24 +- llvm/test/CodeGen/X86/vec_ctbits.ll | 18 +- llvm/test/CodeGen/X86/vec_setcc-2.ll | 13 +- llvm/test/CodeGen/X86/vector-lzcnt-128.ll | 248 ++++++++-------- llvm/test/CodeGen/X86/vector-lzcnt-512.ll | 276 +++++++++--------- llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll | 9 +- llvm/test/CodeGen/X86/vector-popcnt-128.ll | 10 +- llvm/test/CodeGen/X86/vector-unsigned-cmp.ll | 24 +- 34 files changed, 627 insertions(+), 797 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/bsl.ll b/llvm/test/CodeGen/AArch64/bsl.ll index fcf7393d2c801..df6b6f75b8935 100644 --- a/llvm/test/CodeGen/AArch64/bsl.ll +++ b/llvm/test/CodeGen/AArch64/bsl.ll @@ -32,19 +32,17 @@ define <1 x i64> @bsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { define <1 x i64> @nbsl_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { ; NEON-LABEL: nbsl_v1i64: ; NEON: // %bb.0: -; NEON-NEXT: and v0.8b, v2.8b, v0.8b -; NEON-NEXT: bic v1.8b, v1.8b, v2.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b ; NEON-NEXT: mvn v0.8b, v0.8b -; NEON-NEXT: bic v0.8b, v0.8b, v1.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v1i64: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b -; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret %4 = and <1 x i64> %2, %0 %5 = xor <1 x i64> %2, splat (i64 -1) @@ -80,8 +78,9 @@ define <1 x i64> @bsl1n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { define <1 x i64> @bsl2n_v1i64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) { ; NEON-LABEL: bsl2n_v1i64: ; NEON: // %bb.0: -; NEON-NEXT: mvn v1.8b, v1.8b -; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: and v0.8b, v2.8b, v0.8b +; NEON-NEXT: orr v1.8b, v2.8b, v1.8b +; NEON-NEXT: orn v0.8b, v0.8b, v1.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: bsl2n_v1i64: @@ -119,19 +118,17 @@ define <2 x i64> @bsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <2 x i64> @nbsl_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NEON-LABEL: nbsl_v2i64: ; NEON: // %bb.0: -; NEON-NEXT: and v0.16b, v2.16b, v0.16b -; NEON-NEXT: bic v1.16b, v1.16b, v2.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b ; NEON-NEXT: mvn v0.16b, v0.16b -; NEON-NEXT: bic v0.16b, v0.16b, v1.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v2i64: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b -; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %4 = and <2 x i64> %2, %0 %5 = xor <2 x i64> %2, splat (i64 -1) @@ -167,8 +164,9 @@ define <2 x i64> @bsl1n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { ; NEON-LABEL: bsl2n_v2i64: ; NEON: // %bb.0: -; NEON-NEXT: mvn v1.16b, v1.16b -; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: and v0.16b, v2.16b, v0.16b +; NEON-NEXT: orr v1.16b, v2.16b, v1.16b +; NEON-NEXT: orn v0.16b, v0.16b, v1.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: bsl2n_v2i64: @@ -191,18 +189,17 @@ define <2 x i64> @bsl2n_v2i64(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) { define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { ; NEON-LABEL: nbsl_v8i8: ; NEON: // %bb.0: -; NEON-NEXT: and v3.8b, v2.8b, v1.8b -; NEON-NEXT: and v0.8b, v2.8b, v0.8b -; NEON-NEXT: orn v1.8b, v3.8b, v1.8b -; NEON-NEXT: bic v0.8b, v1.8b, v0.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v8i8: ; SVE2: // %bb.0: -; SVE2-NEXT: and v3.8b, v2.8b, v1.8b -; SVE2-NEXT: and v0.8b, v2.8b, v0.8b -; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b -; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret %4 = and <8 x i8> %2, %0 %5 = xor <8 x i8> %2, splat (i8 -1) @@ -215,18 +212,17 @@ define <8 x i8> @nbsl_v8i8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) { define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { ; NEON-LABEL: nbsl_v4i16: ; NEON: // %bb.0: -; NEON-NEXT: and v3.8b, v2.8b, v1.8b -; NEON-NEXT: and v0.8b, v2.8b, v0.8b -; NEON-NEXT: orn v1.8b, v3.8b, v1.8b -; NEON-NEXT: bic v0.8b, v1.8b, v0.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b +; NEON-NEXT: mvn v0.8b, v0.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v4i16: ; SVE2: // %bb.0: -; SVE2-NEXT: and v3.8b, v2.8b, v1.8b -; SVE2-NEXT: and v0.8b, v2.8b, v0.8b -; SVE2-NEXT: orn v1.8b, v3.8b, v1.8b -; SVE2-NEXT: bic v0.8b, v1.8b, v0.8b +; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 +; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret %4 = and <4 x i16> %2, %0 %5 = xor <4 x i16> %2, splat (i16 -1) @@ -239,19 +235,17 @@ define <4 x i16> @nbsl_v4i16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) { define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { ; NEON-LABEL: nbsl_v2i32: ; NEON: // %bb.0: -; NEON-NEXT: and v0.8b, v2.8b, v0.8b -; NEON-NEXT: bic v1.8b, v1.8b, v2.8b +; NEON-NEXT: bif v0.8b, v1.8b, v2.8b ; NEON-NEXT: mvn v0.8b, v0.8b -; NEON-NEXT: bic v0.8b, v0.8b, v1.8b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v2i32: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2 -; SVE2-NEXT: bic v1.8b, v1.8b, v2.8b -; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; SVE2-NEXT: bic v0.8b, v0.8b, v1.8b +; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 ; SVE2-NEXT: ret %4 = and <2 x i32> %2, %0 %5 = xor <2 x i32> %2, splat (i32 -1) @@ -264,18 +258,17 @@ define <2 x i32> @nbsl_v2i32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { ; NEON-LABEL: nbsl_v16i8: ; NEON: // %bb.0: -; NEON-NEXT: and v3.16b, v2.16b, v1.16b -; NEON-NEXT: and v0.16b, v2.16b, v0.16b -; NEON-NEXT: orn v1.16b, v3.16b, v1.16b -; NEON-NEXT: bic v0.16b, v1.16b, v0.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v16i8: ; SVE2: // %bb.0: -; SVE2-NEXT: and v3.16b, v2.16b, v1.16b -; SVE2-NEXT: and v0.16b, v2.16b, v0.16b -; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b -; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %4 = and <16 x i8> %2, %0 %5 = xor <16 x i8> %2, splat (i8 -1) @@ -288,18 +281,17 @@ define <16 x i8> @nbsl_v16i8(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) { define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { ; NEON-LABEL: nbsl_v8i16: ; NEON: // %bb.0: -; NEON-NEXT: and v3.16b, v2.16b, v1.16b -; NEON-NEXT: and v0.16b, v2.16b, v0.16b -; NEON-NEXT: orn v1.16b, v3.16b, v1.16b -; NEON-NEXT: bic v0.16b, v1.16b, v0.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b +; NEON-NEXT: mvn v0.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v8i16: ; SVE2: // %bb.0: -; SVE2-NEXT: and v3.16b, v2.16b, v1.16b -; SVE2-NEXT: and v0.16b, v2.16b, v0.16b -; SVE2-NEXT: orn v1.16b, v3.16b, v1.16b -; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %4 = and <8 x i16> %2, %0 %5 = xor <8 x i16> %2, splat (i16 -1) @@ -312,19 +304,17 @@ define <8 x i16> @nbsl_v8i16(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) { define <4 x i32> @nbsl_v4i32(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) { ; NEON-LABEL: nbsl_v4i32: ; NEON: // %bb.0: -; NEON-NEXT: and v0.16b, v2.16b, v0.16b -; NEON-NEXT: bic v1.16b, v1.16b, v2.16b +; NEON-NEXT: bif v0.16b, v1.16b, v2.16b ; NEON-NEXT: mvn v0.16b, v0.16b -; NEON-NEXT: bic v0.16b, v0.16b, v1.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nbsl_v4i32: ; SVE2: // %bb.0: ; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE2-NEXT: // kill: def $q2 killed $q2 def $z2 -; SVE2-NEXT: bic v1.16b, v1.16b, v2.16b -; SVE2-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; SVE2-NEXT: bic v0.16b, v0.16b, v1.16b +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z2.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %4 = and <4 x i32> %2, %0 %5 = xor <4 x i32> %2, splat (i32 -1) @@ -481,14 +471,16 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 { define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 { ; NEON-LABEL: nor_q: ; NEON: // %bb.0: -; NEON-NEXT: mvn v1.16b, v1.16b -; NEON-NEXT: bic v0.16b, v1.16b, v0.16b +; NEON-NEXT: orr v0.16b, v1.16b, v0.16b +; NEON-NEXT: mvn v0.16b, v0.16b ; NEON-NEXT: ret ; ; SVE2-LABEL: nor_q: ; SVE2: // %bb.0: -; SVE2-NEXT: mvn v1.16b, v1.16b -; SVE2-NEXT: bic v0.16b, v1.16b, v0.16b +; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 +; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 +; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d +; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 ; SVE2-NEXT: ret %3 = or <2 x i64> %1, %0 %4 = xor <2 x i64> %3, splat (i64 -1) diff --git a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll index af7f9b6d471ad..ac0b8e89519dd 100644 --- a/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-dup-simd.ll @@ -117,10 +117,10 @@ entry: define <1 x float> @dup_v1i32_ueq(float %a, float %b) { ; CHECK-NOFULLFP16-LABEL: dup_v1i32_ueq: ; CHECK-NOFULLFP16: // %bb.0: // %entry -; CHECK-NOFULLFP16-NEXT: fcmgt s2, s1, s0 -; CHECK-NOFULLFP16-NEXT: fcmgt s0, s0, s1 -; CHECK-NOFULLFP16-NEXT: mvn v1.8b, v2.8b -; CHECK-NOFULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-NOFULLFP16-NEXT: fcmgt s2, s0, s1 +; CHECK-NOFULLFP16-NEXT: fcmgt s0, s1, s0 +; CHECK-NOFULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NOFULLFP16-NEXT: mvn v0.8b, v0.8b ; CHECK-NOFULLFP16-NEXT: ret ; ; CHECK-NONANS-LABEL: dup_v1i32_ueq: @@ -130,10 +130,10 @@ define <1 x float> @dup_v1i32_ueq(float %a, float %b) { ; ; CHECK-FULLFP16-LABEL: dup_v1i32_ueq: ; CHECK-FULLFP16: // %bb.0: // %entry -; CHECK-FULLFP16-NEXT: fcmgt s2, s1, s0 -; CHECK-FULLFP16-NEXT: fcmgt s0, s0, s1 -; CHECK-FULLFP16-NEXT: mvn v1.8b, v2.8b -; CHECK-FULLFP16-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-FULLFP16-NEXT: fcmgt s2, s0, s1 +; CHECK-FULLFP16-NEXT: fcmgt s0, s1, s0 +; CHECK-FULLFP16-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-FULLFP16-NEXT: mvn v0.8b, v0.8b ; CHECK-FULLFP16-NEXT: ret entry: %0 = fcmp ueq float %a, %b @@ -260,10 +260,10 @@ entry: define <1 x float> @dup_v1i32_uno(float %a, float %b) { ; CHECK-LABEL: dup_v1i32_uno: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcmgt s2, s1, s0 -; CHECK-NEXT: fcmge s0, s0, s1 -; CHECK-NEXT: mvn v1.8b, v2.8b -; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b +; CHECK-NEXT: fcmge s2, s0, s1 +; CHECK-NEXT: fcmgt s0, s1, s0 +; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: ret entry: %0 = fcmp uno float %a, %b diff --git a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll index 529b76cf84906..6233ce743b706 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -563,13 +563,13 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: test_fcmp_ueq: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h -; CHECK-FP16-SD-NEXT: fcmgt v0.4h, v0.4h, v1.4h -; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b -; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmgt v2.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h +; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-FP16-NEXT: mvn v0.8b, v0.8b +; CHECK-FP16-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_ueq: ; CHECK-CVT-GI: // %bb.0: @@ -581,14 +581,6 @@ define <4 x i1> @test_fcmp_ueq(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: test_fcmp_ueq: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: fcmgt v2.4h, v0.4h, v1.4h -; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h -; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-FP16-GI-NEXT: ret %1 = fcmp ueq <4 x half> %a, %b ret <4 x i1> %1 @@ -722,13 +714,13 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: mvn v0.8b, v0.8b ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: test_fcmp_uno: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: fcmgt v2.4h, v1.4h, v0.4h -; CHECK-FP16-SD-NEXT: fcmge v0.4h, v0.4h, v1.4h -; CHECK-FP16-SD-NEXT: mvn v1.8b, v2.8b -; CHECK-FP16-SD-NEXT: bic v0.8b, v1.8b, v0.8b -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: test_fcmp_uno: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmge v2.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: fcmgt v0.4h, v1.4h, v0.4h +; CHECK-FP16-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-FP16-NEXT: mvn v0.8b, v0.8b +; CHECK-FP16-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_uno: ; CHECK-CVT-GI: // %bb.0: @@ -740,14 +732,6 @@ define <4 x i1> @test_fcmp_uno(<4 x half> %a, <4 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: mvn v0.16b, v0.16b ; CHECK-CVT-GI-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: test_fcmp_uno: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: fcmge v2.4h, v0.4h, v1.4h -; CHECK-FP16-GI-NEXT: fcmgt v0.4h, v1.4h, v0.4h -; CHECK-FP16-GI-NEXT: orr v0.8b, v0.8b, v2.8b -; CHECK-FP16-GI-NEXT: mvn v0.8b, v0.8b -; CHECK-FP16-GI-NEXT: ret %1 = fcmp uno <4 x half> %a, %b ret <4 x i1> %1 diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll index 6d67fc9ebe1c6..86763eb5f9e3b 100644 --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -990,14 +990,14 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: test_fcmp_ueq: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h -; CHECK-FP16-SD-NEXT: fcmgt v0.8h, v0.8h, v1.8h -; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: test_fcmp_ueq: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmgt v2.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h +; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-FP16-NEXT: mvn v0.16b, v0.16b +; CHECK-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_ueq: ; CHECK-CVT-GI: // %bb.0: @@ -1016,15 +1016,6 @@ define <8 x i1> @test_fcmp_ueq(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: test_fcmp_ueq: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: fcmgt v2.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h -; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-GI-NEXT: ret %1 = fcmp ueq <8 x half> %a, %b ret <8 x i1> %1 } @@ -1199,14 +1190,14 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-SD-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-SD-NEXT: ret ; -; CHECK-FP16-SD-LABEL: test_fcmp_uno: -; CHECK-FP16-SD: // %bb.0: -; CHECK-FP16-SD-NEXT: fcmgt v2.8h, v1.8h, v0.8h -; CHECK-FP16-SD-NEXT: fcmge v0.8h, v0.8h, v1.8h -; CHECK-FP16-SD-NEXT: mvn v1.16b, v2.16b -; CHECK-FP16-SD-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-FP16-SD-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-SD-NEXT: ret +; CHECK-FP16-LABEL: test_fcmp_uno: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmge v2.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: fcmgt v0.8h, v1.8h, v0.8h +; CHECK-FP16-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-FP16-NEXT: mvn v0.16b, v0.16b +; CHECK-FP16-NEXT: xtn v0.8b, v0.8h +; CHECK-FP16-NEXT: ret ; ; CHECK-CVT-GI-LABEL: test_fcmp_uno: ; CHECK-CVT-GI: // %bb.0: @@ -1225,15 +1216,6 @@ define <8 x i1> @test_fcmp_uno(<8 x half> %a, <8 x half> %b) #0 { ; CHECK-CVT-GI-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-GI-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-GI-NEXT: ret -; -; CHECK-FP16-GI-LABEL: test_fcmp_uno: -; CHECK-FP16-GI: // %bb.0: -; CHECK-FP16-GI-NEXT: fcmge v2.8h, v0.8h, v1.8h -; CHECK-FP16-GI-NEXT: fcmgt v0.8h, v1.8h, v0.8h -; CHECK-FP16-GI-NEXT: orr v0.16b, v0.16b, v2.16b -; CHECK-FP16-GI-NEXT: mvn v0.16b, v0.16b -; CHECK-FP16-GI-NEXT: xtn v0.8b, v0.8h -; CHECK-FP16-GI-NEXT: ret %1 = fcmp uno <8 x half> %a, %b ret <8 x i1> %1 } diff --git a/llvm/test/CodeGen/AArch64/sve2-bsl.ll b/llvm/test/CodeGen/AArch64/sve2-bsl.ll index 80293388a5cf9..6cfe66eb8e633 100644 --- a/llvm/test/CodeGen/AArch64/sve2-bsl.ll +++ b/llvm/test/CodeGen/AArch64/sve2-bsl.ll @@ -46,9 +46,7 @@ define @nbsl_i8( %a, %b) ; CHECK-LABEL: nbsl_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.b, #127 // =0x7f -; CHECK-NEXT: and z1.b, z1.b, #0x80 -; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d -; CHECK-NEXT: bic z0.d, z2.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %1 = and %a, splat(i8 127) %2 = and %b, splat(i8 -128) @@ -61,9 +59,7 @@ define @nbsl_i16( %a, %b ; CHECK-LABEL: nbsl_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d -; CHECK-NEXT: bic z0.d, z2.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %1 = and %a, splat(i16 32767) %2 = and %b, splat(i16 -32768) @@ -76,9 +72,7 @@ define @nbsl_i32( %a, %b ; CHECK-LABEL: nbsl_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d -; CHECK-NEXT: bic z0.d, z2.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %1 = and %a, splat(i32 2147483647) %2 = and %b, splat(i32 -2147483648) @@ -91,9 +85,7 @@ define @nbsl_i64( %a, %b ; CHECK-LABEL: nbsl_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 -; CHECK-NEXT: nbsl z2.d, z2.d, z0.d, z0.d -; CHECK-NEXT: bic z0.d, z2.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %1 = and %a, splat(i64 9223372036854775807) %2 = and %b, splat(i64 -9223372036854775808) @@ -123,9 +115,7 @@ define @codegen_bsl_i8( %0, @codegen_nbsl_i8( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: bic z1.d, z1.d, z2.d -; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i8 -1) @@ -175,9 +165,7 @@ define @codegen_bsl_i16( %0, @codegen_nbsl_i16( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: bic z1.d, z1.d, z2.d -; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i16 -1) @@ -227,9 +215,7 @@ define @codegen_bsl_i32( %0, @codegen_nbsl_i32( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: bic z1.d, z1.d, z2.d -; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i32 -1) @@ -279,9 +265,7 @@ define @codegen_bsl_i64( %0, @codegen_nbsl_i64( %0, %1, %2) { ; CHECK-LABEL: codegen_nbsl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: bic z1.d, z1.d, z2.d -; CHECK-NEXT: nbsl z0.d, z0.d, z2.d, z2.d -; CHECK-NEXT: bic z0.d, z0.d, z1.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d ; CHECK-NEXT: ret %4 = and %2, %0 %5 = xor %2, splat (i64 -1) @@ -357,9 +341,7 @@ define @nand( %0, %1) #0 define @nor( %0, %1) #0 { ; CHECK-LABEL: nor: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff -; CHECK-NEXT: eor z1.d, z1.d, z2.d -; CHECK-NEXT: bic z0.d, z1.d, z0.d +; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d ; CHECK-NEXT: ret %3 = or %1, %0 %4 = xor %3, splat (i64 -1) diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll index 78644691fb646..c20d319f2ac79 100644 --- a/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-fcmp-spe.ll @@ -113,12 +113,14 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 { define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; SPE-LABEL: test_f32_ueq_s: ; SPE: # %bb.0: -; SPE-NEXT: efscmpgt cr0, r5, r6 -; SPE-NEXT: bc 12, gt, .LBB7_2 -; SPE-NEXT: # %bb.1: ; SPE-NEXT: efscmplt cr0, r5, r6 -; SPE-NEXT: bclr 4, gt, 0 -; SPE-NEXT: .LBB7_2: +; SPE-NEXT: bc 12, gt, .LBB7_3 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: efscmpgt cr0, r5, r6 +; SPE-NEXT: bc 12, gt, .LBB7_3 +; SPE-NEXT: # %bb.2: +; SPE-NEXT: mr r4, r3 +; SPE-NEXT: .LBB7_3: ; SPE-NEXT: mr r3, r4 ; SPE-NEXT: blr %cond = call i1 @llvm.experimental.constrained.fcmps.f32(float %f1, float %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 @@ -353,12 +355,14 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; SPE: # %bb.0: ; SPE-NEXT: evmergelo r7, r7, r8 ; SPE-NEXT: evmergelo r5, r5, r6 -; SPE-NEXT: efdcmpgt cr0, r5, r7 -; SPE-NEXT: bc 12, gt, .LBB21_2 -; SPE-NEXT: # %bb.1: ; SPE-NEXT: efdcmplt cr0, r5, r7 -; SPE-NEXT: bclr 4, gt, 0 -; SPE-NEXT: .LBB21_2: +; SPE-NEXT: bc 12, gt, .LBB21_3 +; SPE-NEXT: # %bb.1: +; SPE-NEXT: efdcmpgt cr0, r5, r7 +; SPE-NEXT: bc 12, gt, .LBB21_3 +; SPE-NEXT: # %bb.2: +; SPE-NEXT: mr r4, r3 +; SPE-NEXT: .LBB21_3: ; SPE-NEXT: mr r3, r4 ; SPE-NEXT: blr %cond = call i1 @llvm.experimental.constrained.fcmps.f64(double %f1, double %f2, metadata !"ueq", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll index 872a08c20eae8..310f0a66aa9b9 100644 --- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll +++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll @@ -28,7 +28,8 @@ define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind { define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: test_vorc: ; CHECK: # %bb.0: -; CHECK-NEXT: vor 2, 3, 2 +; CHECK-NEXT: vorc 3, 2, 3 +; CHECK-NEXT: vorc 2, 2, 3 ; CHECK-NEXT: blr %tmp1 = xor <4 x i32> %y, %tmp2 = or <4 x i32> %x, %tmp1 diff --git a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll index ba5c9edb3897d..e391228fc95a9 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-eqv-nor-or-xor.ll @@ -51,8 +51,7 @@ entry: define dso_local <16 x i8> @norA_andB_C(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 { ; CHECK-LABEL: norA_andB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v2, v2 -; CHECK-NEXT: xxeval v2, vs0, v3, v4, 14 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 224 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -104,8 +103,7 @@ entry: define dso_local <4 x i32> @norA_xorB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { ; CHECK-LABEL: norA_xorB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v2, v2 -; CHECK-NEXT: xxeval v2, vs0, v3, v4, 9 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 144 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -171,9 +169,7 @@ entry: define dso_local <4 x i32> @orA_norB_C(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { ; CHECK-LABEL: orA_norB_C: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxlnor vs1, v3, v3 -; CHECK-NEXT: xxeval v2, v2, vs1, vs0, 31 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 143 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll index f98edc21bf2ea..b41220b01373a 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-and.ll @@ -80,11 +80,9 @@ define <4 x i32> @ternary_A_nor_BC_and_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -99,13 +97,10 @@ define <2 x i64> @ternary_A_nor_BC_and_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxland vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -120,12 +115,9 @@ define <16 x i8> @ternary_A_nor_BC_and_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_and_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxland vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -140,12 +132,9 @@ define <8 x i16> @ternary_A_nor_BC_and_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_and_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxland vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 24 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll index 0baa420b79761..a51e392279d55 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-b.ll @@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_B_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_B_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_B_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_B_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_B_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_B_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_B_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v3, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 56 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll index 6fc822d729457..54bf6c03f8c1a 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-c.ll @@ -77,9 +77,8 @@ define <4 x i32> @ternary_A_nor_BC_C_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -93,12 +92,10 @@ define <2 x i64> @ternary_A_nor_BC_C_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_nor_BC_C_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -112,11 +109,9 @@ define <16 x i8> @ternary_A_nor_BC_C_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_nor_BC_C_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -130,11 +125,9 @@ define <8 x i16> @ternary_A_nor_BC_C_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_nor_BC_C_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, v4, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 88 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll index 78ae36cc0ecf7..ba7680b27cc17 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-eqv.ll @@ -84,11 +84,9 @@ define <4 x i32> @ternary_A_nor_BC_eqv_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -104,13 +102,10 @@ define <2 x i64> @ternary_A_nor_BC_eqv_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxleqv vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -126,12 +121,9 @@ define <16 x i8> @ternary_A_nor_BC_eqv_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxleqv vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -147,12 +139,9 @@ define <8 x i16> @ternary_A_nor_BC_eqv_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_eqv_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxleqv vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 152 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll index 90928e668afd8..369587454a7c1 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-nor.ll @@ -15,11 +15,9 @@ define <4 x i32> @ternary_A_and_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -34,13 +32,10 @@ define <2 x i64> @ternary_A_and_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxland vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -55,12 +50,9 @@ define <16 x i8> @ternary_A_and_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_and_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -75,12 +67,9 @@ define <8 x i16> @ternary_A_and_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_and_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxland vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 129 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C @@ -96,9 +85,8 @@ define <4 x i32> @ternary_A_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -112,12 +100,10 @@ define <2 x i64> @ternary_A_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -131,11 +117,9 @@ define <16 x i8> @ternary_A_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -149,11 +133,9 @@ define <8 x i16> @ternary_A_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v3, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 131 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -168,9 +150,8 @@ define <4 x i32> @ternary_A_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i32> % ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -184,12 +165,10 @@ define <2 x i64> @ternary_A_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i64> % ; CHECK-LABEL: ternary_A_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -203,11 +182,9 @@ define <16 x i8> @ternary_A_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x i8> ; CHECK-LABEL: ternary_A_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -221,11 +198,9 @@ define <8 x i16> @ternary_A_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i16> % ; CHECK-LABEL: ternary_A_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, v4, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 133 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C @@ -239,11 +214,9 @@ define <4 x i32> @ternary_A_xor_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %C @@ -258,13 +231,10 @@ define <2 x i64> @ternary_A_xor_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlxor vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <2 x i64> %B, %C @@ -279,12 +249,9 @@ define <16 x i8> @ternary_A_xor_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <16 x i8> %B, %C @@ -299,12 +266,9 @@ define <8 x i16> @ternary_A_xor_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_xor_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlxor vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 134 ; CHECK-NEXT: blr entry: %xor = xor <8 x i16> %B, %C @@ -319,11 +283,9 @@ define <4 x i32> @ternary_A_not_C_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_C_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %C, ; Vector not operation @@ -338,13 +300,10 @@ define <2 x i64> @ternary_A_not_C_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_C_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnor vs0, v4, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %C, ; Vector not operation @@ -359,12 +318,9 @@ define <16 x i8> @ternary_A_not_C_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_C_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxlnor vs1, v4, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, vs1, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %C, ; Vector not operation @@ -379,12 +335,9 @@ define <8 x i16> @ternary_A_not_C_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_C_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnor vs0, v4, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 138 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %C, ; Vector not operation @@ -399,11 +352,9 @@ define <4 x i32> @ternary_A_not_B_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i3 ; CHECK-LABEL: ternary_A_not_B_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnor vs0, v3, v3 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <4 x i32> %B, ; Vector not operation @@ -418,13 +369,10 @@ define <2 x i64> @ternary_A_not_B_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i6 ; CHECK-LABEL: ternary_A_not_B_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnor vs0, v3, v3 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <2 x i64> %B, ; Vector not operation @@ -439,12 +387,9 @@ define <16 x i8> @ternary_A_not_B_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_not_B_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxlnor vs1, v3, v3 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs0, vs1, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <16 x i8> %B, ; Vector not operation @@ -459,12 +404,9 @@ define <8 x i16> @ternary_A_not_B_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i1 ; CHECK-LABEL: ternary_A_not_B_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnor vs0, v3, v3 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 140 ; CHECK-NEXT: blr entry: %not = xor <8 x i16> %B, ; Vector not operation @@ -479,11 +421,9 @@ define <4 x i32> @ternary_A_nand_BC_nor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs1, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %C @@ -499,13 +439,10 @@ define <2 x i64> @ternary_A_nand_BC_nor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnand vs0, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %C @@ -521,12 +458,9 @@ define <16 x i8> @ternary_A_nand_BC_nor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %C @@ -542,12 +476,9 @@ define <8 x i16> @ternary_A_nand_BC_nor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x ; CHECK-LABEL: ternary_A_nand_BC_nor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs1, vs1, vs1 -; CHECK-NEXT: xxlnand vs0, v3, v4 -; CHECK-NEXT: xxeval vs1, v3, v4, vs1, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 142 ; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll index 5031ebc930e11..0fc296cc5a4e2 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-vselect-x-xor.ll @@ -267,11 +267,9 @@ define <4 x i32> @ternary_A_nor_BC_xor_BC_4x32(<4 x i1> %A, <4 x i32> %B, <4 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_4x32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxleqv v5, v5, v5 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: vslw v2, v2, v5 -; CHECK-NEXT: xxeval vs0, v3, v4, v5, 96 ; CHECK-NEXT: vsraw v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <4 x i32> %B, %C @@ -286,13 +284,10 @@ define <2 x i64> @ternary_A_nor_BC_xor_BC_2x64(<2 x i1> %A, <2 x i64> %B, <2 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_2x64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxlxor v5, v5, v5 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxlxor vs1, v3, v4 ; CHECK-NEXT: xxsplti32dx v5, 1, 63 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vsld v2, v2, v5 ; CHECK-NEXT: vsrad v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <2 x i64> %B, %C @@ -307,12 +302,9 @@ define <16 x i8> @ternary_A_nor_BC_xor_BC_16x8(<16 x i1> %A, <16 x i8> %B, <16 x ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_16x8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltib v5, 7 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxlxor vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslb v2, v2, v5 ; CHECK-NEXT: vsrab v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <16 x i8> %B, %C @@ -327,12 +319,9 @@ define <8 x i16> @ternary_A_nor_BC_xor_BC_8x16(<8 x i1> %A, <8 x i16> %B, <8 x i ; CHECK-LABEL: ternary_A_nor_BC_xor_BC_8x16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xxspltiw v5, 983055 -; CHECK-NEXT: xxleqv vs0, vs0, vs0 -; CHECK-NEXT: xxlxor vs1, v3, v4 -; CHECK-NEXT: xxeval vs0, v3, v4, vs0, 96 ; CHECK-NEXT: vslh v2, v2, v5 ; CHECK-NEXT: vsrah v2, v2, v5 -; CHECK-NEXT: xxsel v2, vs1, vs0, v2 +; CHECK-NEXT: xxeval v2, v2, v3, v4, 104 ; CHECK-NEXT: blr entry: %or = or <8 x i16> %B, %C diff --git a/llvm/test/CodeGen/X86/abds-vector-128.ll b/llvm/test/CodeGen/X86/abds-vector-128.ll index bc57a31f063b5..148be83892b72 100644 --- a/llvm/test/CodeGen/X86/abds-vector-128.ll +++ b/llvm/test/CodeGen/X86/abds-vector-128.ll @@ -756,9 +756,9 @@ define <2 x i64> @abd_cmp_v2i64_multiuse_cmp(<2 x i64> %a, <2 x i64> %b) nounwin ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2] ; SSE2-NEXT: pand %xmm6, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: paddq %xmm4, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll index 37df42ea2682d..3fcfb9d278da7 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-bit-manip.ll @@ -714,19 +714,18 @@ define <64 x i8> @tzmsk_v64i8(<64 x i8> %a0, <64 x i8> %a1) { ; AVX512F-NEXT: vpmovmskb %ymm4, %ecx ; AVX512F-NEXT: shlq $32, %rcx ; AVX512F-NEXT: leaq (%rax,%rcx), %rdx -; AVX512F-NEXT: addq $-1, %rdx -; AVX512F-NEXT: notq %rcx -; AVX512F-NEXT: andnq %rcx, %rax, %rax -; AVX512F-NEXT: andq %rax, %rdx -; AVX512F-NEXT: movq %rdx, %rax -; AVX512F-NEXT: movl %edx, %ecx -; AVX512F-NEXT: kmovw %edx, %k1 -; AVX512F-NEXT: shrq $32, %rdx -; AVX512F-NEXT: shrq $48, %rax -; AVX512F-NEXT: shrl $16, %ecx -; AVX512F-NEXT: kmovw %ecx, %k2 -; AVX512F-NEXT: kmovw %eax, %k3 -; AVX512F-NEXT: kmovw %edx, %k4 +; AVX512F-NEXT: addq %rcx, %rax +; AVX512F-NEXT: addq $-1, %rax +; AVX512F-NEXT: andnq %rax, %rdx, %rax +; AVX512F-NEXT: movq %rax, %rcx +; AVX512F-NEXT: movl %eax, %edx +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: shrq $32, %rax +; AVX512F-NEXT: shrq $48, %rcx +; AVX512F-NEXT: shrl $16, %edx +; AVX512F-NEXT: kmovw %edx, %k2 +; AVX512F-NEXT: kmovw %ecx, %k3 +; AVX512F-NEXT: kmovw %eax, %k4 ; AVX512F-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; AVX512F-NEXT: vpaddb %ymm1, %ymm0, %ymm1 ; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index 8d5bbb4ae8e1e..8c91274abf3dd 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -183,32 +183,14 @@ define i32 @or_and_multiuse_and_multiuse_i32(i32 %x, i32 %y) nounwind { } define i64 @or_build_pair_not(i32 %a0, i32 %a1) { -; SSE-LABEL: or_build_pair_not: -; SSE: # %bb.0: -; SSE-NEXT: # kill: def $esi killed $esi def $rsi -; SSE-NEXT: shlq $32, %rsi -; SSE-NEXT: movl %edi, %eax -; SSE-NEXT: orq %rsi, %rax -; SSE-NEXT: notq %rax -; SSE-NEXT: retq -; -; AVX1-LABEL: or_build_pair_not: -; AVX1: # %bb.0: -; AVX1-NEXT: # kill: def $esi killed $esi def $rsi -; AVX1-NEXT: shlq $32, %rsi -; AVX1-NEXT: movl %edi, %eax -; AVX1-NEXT: orq %rsi, %rax -; AVX1-NEXT: notq %rax -; AVX1-NEXT: retq -; -; AVX2-LABEL: or_build_pair_not: -; AVX2: # %bb.0: -; AVX2-NEXT: # kill: def $esi killed $esi def $rsi -; AVX2-NEXT: shlq $32, %rsi -; AVX2-NEXT: notq %rsi -; AVX2-NEXT: movl %edi, %eax -; AVX2-NEXT: andnq %rsi, %rax, %rax -; AVX2-NEXT: retq +; CHECK-LABEL: or_build_pair_not: +; CHECK: # %bb.0: +; CHECK-NEXT: # kill: def $esi killed $esi def $rsi +; CHECK-NEXT: shlq $32, %rsi +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: notq %rax +; CHECK-NEXT: retq %n0 = xor i32 %a0, -1 %n1 = xor i32 %a1, -1 %x0 = zext i32 %n0 to i64 @@ -280,9 +262,10 @@ define i64 @PR89533(<64 x i8> %a0) { ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx ; AVX2-NEXT: shlq $32, %rcx +; AVX2-NEXT: orq %rax, %rcx ; AVX2-NEXT: notq %rcx -; AVX2-NEXT: andnq %rcx, %rax, %rax -; AVX2-NEXT: tzcntq %rax, %rax +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: tzcntq %rcx, %rax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq %cmp = icmp ne <64 x i8> %a0, diff --git a/llvm/test/CodeGen/X86/combine-srl.ll b/llvm/test/CodeGen/X86/combine-srl.ll index 4e31177023b08..21657bf67f233 100644 --- a/llvm/test/CodeGen/X86/combine-srl.ll +++ b/llvm/test/CodeGen/X86/combine-srl.ll @@ -437,12 +437,13 @@ define <4 x i32> @combine_vec_lshr_lzcnt_bit1(<4 x i32> %x) { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $8, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll index 7919495821efd..905d1648564fb 100644 --- a/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll +++ b/llvm/test/CodeGen/X86/expand-vp-int-intrinsics.ll @@ -1487,12 +1487,13 @@ define <4 x i32> @vp_ctlz_v4i32(<4 x i32> %va, <4 x i1> %m, i32 zeroext %evl) { ; SSE-NEXT: por %xmm1, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrld $8, %xmm1 -; SSE-NEXT: por %xmm1, %xmm0 -; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: psrld $16, %xmm1 ; SSE-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pandn %xmm2, %xmm0 +; SSE-NEXT: movdqa %xmm0, %xmm3 +; SSE-NEXT: pandn %xmm2, %xmm3 +; SSE-NEXT: por %xmm1, %xmm0 +; SSE-NEXT: psrld $16, %xmm0 +; SSE-NEXT: pandn %xmm3, %xmm0 ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: psrlw $1, %xmm1 ; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index 478d80e9827a5..badfd1af940ca 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -179,23 +179,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) { ; CHECK-NOBMI-NEXT: pxor %xmm4, %xmm1 ; CHECK-NOBMI-NEXT: movdqa %xmm1, %xmm6 ; CHECK-NOBMI-NEXT: pcmpgtd %xmm4, %xmm6 -; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] -; CHECK-NOBMI-NEXT: pand %xmm6, %xmm1 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm4 = xmm6[1,1,3,3] -; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm4 -; CHECK-NOBMI-NEXT: pandn %xmm4, %xmm1 ; CHECK-NOBMI-NEXT: pxor %xmm5, %xmm3 ; CHECK-NOBMI-NEXT: pxor %xmm3, %xmm0 -; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm4 -; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm4 +; CHECK-NOBMI-NEXT: movdqa %xmm0, %xmm5 +; CHECK-NOBMI-NEXT: pcmpgtd %xmm3, %xmm5 +; CHECK-NOBMI-NEXT: movdqa %xmm5, %xmm7 +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm7 = xmm7[0,2],xmm6[0,2] +; CHECK-NOBMI-NEXT: pcmpeqd %xmm4, %xmm1 ; CHECK-NOBMI-NEXT: pcmpeqd %xmm3, %xmm0 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-NOBMI-NEXT: pand %xmm4, %xmm0 -; CHECK-NOBMI-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] -; CHECK-NOBMI-NEXT: pxor %xmm2, %xmm3 -; CHECK-NOBMI-NEXT: pandn %xmm3, %xmm0 -; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,3],xmm1[1,3] +; CHECK-NOBMI-NEXT: andps %xmm7, %xmm0 +; CHECK-NOBMI-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm6[1,3] +; CHECK-NOBMI-NEXT: orps %xmm5, %xmm0 +; CHECK-NOBMI-NEXT: xorps %xmm2, %xmm0 ; CHECK-NOBMI-NEXT: retq ; ; CHECK-AVX2-LABEL: neither_pow2_non_zero_4xv64_x_maybe_z: diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll index 0713f0bbe244c..c84a1159ad56a 100644 --- a/llvm/test/CodeGen/X86/machine-cp.ll +++ b/llvm/test/CodeGen/X86/machine-cp.ll @@ -100,38 +100,55 @@ define <16 x float> @foo(<16 x float> %x) { ; CHECK-LABEL: foo: ; CHECK: ## %bb.0: ## %bb ; CHECK-NEXT: xorps %xmm5, %xmm5 -; CHECK-NEXT: cvttps2dq %xmm3, %xmm6 +; CHECK-NEXT: cvttps2dq %xmm3, %xmm8 ; CHECK-NEXT: movaps %xmm3, %xmm4 ; CHECK-NEXT: cmpltps %xmm5, %xmm4 -; CHECK-NEXT: cvttps2dq %xmm2, %xmm3 +; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16] +; CHECK-NEXT: movaps %xmm4, %xmm6 +; CHECK-NEXT: orps %xmm7, %xmm6 +; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3 +; CHECK-NEXT: andps %xmm7, %xmm3 +; CHECK-NEXT: andps %xmm6, %xmm3 +; CHECK-NEXT: andnps %xmm4, %xmm6 +; CHECK-NEXT: cvttps2dq %xmm2, %xmm4 ; CHECK-NEXT: movaps %xmm2, %xmm7 ; CHECK-NEXT: cmpltps %xmm5, %xmm7 -; CHECK-NEXT: cvttps2dq %xmm1, %xmm2 +; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12] +; CHECK-NEXT: movaps %xmm7, %xmm9 +; CHECK-NEXT: orps %xmm8, %xmm9 +; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2 +; CHECK-NEXT: andps %xmm8, %xmm2 +; CHECK-NEXT: andps %xmm9, %xmm2 +; CHECK-NEXT: andnps %xmm7, %xmm9 +; CHECK-NEXT: cvttps2dq %xmm1, %xmm4 +; CHECK-NEXT: cmpltps %xmm5, %xmm1 +; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8] ; CHECK-NEXT: movaps %xmm1, %xmm8 -; CHECK-NEXT: cmpltps %xmm5, %xmm8 +; CHECK-NEXT: orps %xmm7, %xmm8 +; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4 +; CHECK-NEXT: andps %xmm7, %xmm4 +; CHECK-NEXT: andps %xmm8, %xmm4 +; CHECK-NEXT: andnps %xmm1, %xmm8 ; CHECK-NEXT: cvttps2dq %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, %xmm9 -; CHECK-NEXT: cmpltps %xmm5, %xmm9 +; CHECK-NEXT: cmpltps %xmm5, %xmm0 ; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4] -; CHECK-NEXT: orps %xmm5, %xmm9 -; CHECK-NEXT: movaps {{.*#+}} xmm10 = [5,6,7,8] -; CHECK-NEXT: orps %xmm10, %xmm8 -; CHECK-NEXT: movaps {{.*#+}} xmm11 = [9,10,11,12] -; CHECK-NEXT: orps %xmm11, %xmm7 -; CHECK-NEXT: movaps {{.*#+}} xmm12 = [13,14,15,16] -; CHECK-NEXT: orps %xmm12, %xmm4 -; CHECK-NEXT: cvtdq2ps %xmm1, %xmm0 -; CHECK-NEXT: cvtdq2ps %xmm2, %xmm1 -; CHECK-NEXT: cvtdq2ps %xmm3, %xmm2 -; CHECK-NEXT: cvtdq2ps %xmm6, %xmm3 -; CHECK-NEXT: andps %xmm5, %xmm0 -; CHECK-NEXT: andps %xmm9, %xmm0 -; CHECK-NEXT: andps %xmm10, %xmm1 -; CHECK-NEXT: andps %xmm8, %xmm1 -; CHECK-NEXT: andps %xmm11, %xmm2 -; CHECK-NEXT: andps %xmm7, %xmm2 -; CHECK-NEXT: andps %xmm12, %xmm3 -; CHECK-NEXT: andps %xmm4, %xmm3 +; CHECK-NEXT: movaps %xmm0, %xmm7 +; CHECK-NEXT: orps %xmm5, %xmm7 +; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 +; CHECK-NEXT: andps %xmm5, %xmm1 +; CHECK-NEXT: andps %xmm7, %xmm1 +; CHECK-NEXT: andnps %xmm0, %xmm7 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] +; CHECK-NEXT: andps %xmm0, %xmm7 +; CHECK-NEXT: orps %xmm7, %xmm1 +; CHECK-NEXT: andps %xmm0, %xmm8 +; CHECK-NEXT: orps %xmm8, %xmm4 +; CHECK-NEXT: andps %xmm0, %xmm9 +; CHECK-NEXT: orps %xmm9, %xmm2 +; CHECK-NEXT: andps %xmm0, %xmm6 +; CHECK-NEXT: orps %xmm6, %xmm3 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: movaps %xmm4, %xmm1 ; CHECK-NEXT: retq bb: %v3 = icmp slt <16 x i32> , zeroinitializer diff --git a/llvm/test/CodeGen/X86/promote-cmp.ll b/llvm/test/CodeGen/X86/promote-cmp.ll index aeb8fe93930a0..88934a382bbfa 100644 --- a/llvm/test/CodeGen/X86/promote-cmp.ll +++ b/llvm/test/CodeGen/X86/promote-cmp.ll @@ -8,36 +8,34 @@ define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) { ; SSE2-LABEL: PR45808: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm3, %xmm6 +; SSE2-NEXT: movdqa %xmm3, %xmm5 +; SSE2-NEXT: pxor %xmm4, %xmm5 +; SSE2-NEXT: movdqa %xmm1, %xmm6 ; SSE2-NEXT: pxor %xmm4, %xmm6 -; SSE2-NEXT: movdqa %xmm1, %xmm7 -; SSE2-NEXT: pxor %xmm4, %xmm7 -; SSE2-NEXT: movdqa %xmm7, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm6, %xmm5 +; SSE2-NEXT: movdqa %xmm6, %xmm7 +; SSE2-NEXT: pcmpgtd %xmm5, %xmm7 ; SSE2-NEXT: movdqa %xmm2, %xmm8 ; SSE2-NEXT: pxor %xmm4, %xmm8 ; SSE2-NEXT: pxor %xmm0, %xmm4 ; SSE2-NEXT: movdqa %xmm4, %xmm9 ; SSE2-NEXT: pcmpgtd %xmm8, %xmm9 ; SSE2-NEXT: movdqa %xmm9, %xmm10 -; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm5[0,2] -; SSE2-NEXT: pcmpeqd %xmm6, %xmm7 +; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm7[0,2] +; SSE2-NEXT: pcmpeqd %xmm5, %xmm6 ; SSE2-NEXT: pcmpeqd %xmm8, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm7[1,3] +; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3] ; SSE2-NEXT: andps %xmm10, %xmm4 -; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm5[1,3] -; SSE2-NEXT: pcmpeqd %xmm6, %xmm6 -; SSE2-NEXT: pxor %xmm9, %xmm6 -; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm4[2,2,3,3] -; SSE2-NEXT: pandn %xmm6, %xmm4 -; SSE2-NEXT: pxor %xmm6, %xmm6 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm6 -; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm6[0],xmm4[1],xmm6[1] +; SSE2-NEXT: shufps {{.*#+}} xmm9 = xmm9[1,3],xmm7[1,3] +; SSE2-NEXT: orps %xmm4, %xmm9 +; SSE2-NEXT: pcmpeqd %xmm4, %xmm4 +; SSE2-NEXT: pxor %xmm9, %xmm4 +; SSE2-NEXT: pxor %xmm5, %xmm5 +; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 +; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1] ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: pandn %xmm2, %xmm4 ; SSE2-NEXT: por %xmm4, %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm7, %xmm2 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm9[2,2,3,3] ; SSE2-NEXT: pslld $31, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 ; SSE2-NEXT: pand %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/setcc-combine.ll b/llvm/test/CodeGen/X86/setcc-combine.ll index d97e603c636af..f526db00df606 100644 --- a/llvm/test/CodeGen/X86/setcc-combine.ll +++ b/llvm/test/CodeGen/X86/setcc-combine.ll @@ -1020,9 +1020,9 @@ define <2 x i64> @cmp_uge_not_with_vec2xi64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retq %na = xor <2 x i64> %a, %nb = xor <2 x i64> %b, diff --git a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll index 6e68b37bec98a..84856aab85079 100644 --- a/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-vec-tautological.ll @@ -198,9 +198,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm1 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-SSE2-NEXT: pxor %xmm0, %xmm2 -; CHECK-SSE2-NEXT: pandn %xmm2, %xmm1 +; CHECK-SSE2-NEXT: por %xmm1, %xmm0 +; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 ; CHECK-SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; CHECK-SSE2-NEXT: retq ; @@ -223,9 +223,9 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; CHECK-SSE41-NEXT: pand %xmm2, %xmm1 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; CHECK-SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-SSE41-NEXT: pxor %xmm0, %xmm2 -; CHECK-SSE41-NEXT: pandn %xmm2, %xmm1 +; CHECK-SSE41-NEXT: por %xmm1, %xmm0 +; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-SSE41-NEXT: pxor %xmm0, %xmm1 ; CHECK-SSE41-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; CHECK-SSE41-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll index 25ba593d47062..63e08de7fdf53 100644 --- a/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_sint-128.ll @@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ge_v2i64: @@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: ge_v2i64: @@ -606,9 +606,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: le_v2i64: @@ -623,9 +623,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: le_v2i64: diff --git a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll index bd730e7dbefbc..9d65ff94061b0 100644 --- a/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll +++ b/llvm/test/CodeGen/X86/vec_cmp_uint-128.ll @@ -298,9 +298,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ge_v2i64: @@ -315,9 +315,9 @@ define <2 x i64> @ge_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: ge_v2i64: @@ -722,9 +722,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: le_v2i64: @@ -739,9 +739,9 @@ define <2 x i64> @le_v2i64(<2 x i64> %a, <2 x i64> %b) nounwind { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; SSE42-LABEL: le_v2i64: diff --git a/llvm/test/CodeGen/X86/vec_compare.ll b/llvm/test/CodeGen/X86/vec_compare.ll index 0fc298a2b4cd4..c1045c7b72f2c 100644 --- a/llvm/test/CodeGen/X86/vec_compare.ll +++ b/llvm/test/CodeGen/X86/vec_compare.ll @@ -128,9 +128,9 @@ define <2 x i64> @test9(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retl %C = icmp sge <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -150,9 +150,9 @@ define <2 x i64> @test10(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retl %C = icmp sle <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -212,9 +212,9 @@ define <2 x i64> @test13(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retl %C = icmp uge <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> @@ -234,9 +234,9 @@ define <2 x i64> @test14(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; CHECK-NEXT: pand %xmm3, %xmm0 ; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 -; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: por %xmm0, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 +; CHECK-NEXT: pxor %xmm1, %xmm0 ; CHECK-NEXT: retl %C = icmp ule <2 x i64> %A, %B %D = sext <2 x i1> %C to <2 x i64> diff --git a/llvm/test/CodeGen/X86/vec_ctbits.ll b/llvm/test/CodeGen/X86/vec_ctbits.ll index 048117dd43e66..4a3bcbb0a96a4 100644 --- a/llvm/test/CodeGen/X86/vec_ctbits.ll +++ b/llvm/test/CodeGen/X86/vec_ctbits.ll @@ -49,12 +49,13 @@ define <2 x i64> @foolz(<2 x i64> %a) nounwind { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlq $16, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrlq $32, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 ; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm3 +; CHECK-NEXT: pandn %xmm2, %xmm3 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: psrlq $32, %xmm0 +; CHECK-NEXT: pandn %xmm3, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -151,12 +152,13 @@ define <2 x i32> @promlz(<2 x i32> %a) nounwind { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $8, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrld $16, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 ; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm3 +; CHECK-NEXT: pandn %xmm2, %xmm3 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: pandn %xmm3, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/vec_setcc-2.ll b/llvm/test/CodeGen/X86/vec_setcc-2.ll index ade6b5c8d6bdf..5a71878ea4579 100644 --- a/llvm/test/CodeGen/X86/vec_setcc-2.ll +++ b/llvm/test/CodeGen/X86/vec_setcc-2.ll @@ -448,14 +448,13 @@ define <2 x i1> @ule_v2i64_splat(<2 x i64> %x) { ; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 -; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE2-NEXT: pxor %xmm2, %xmm3 -; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,2,2] +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,0,2,2] ; SSE2-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: pand %xmm0, %xmm1 -; SSE2-NEXT: pandn %xmm3, %xmm1 -; SSE2-NEXT: movdqa %xmm1, %xmm0 +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE2-NEXT: por %xmm1, %xmm2 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm2, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v2i64_splat: diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll index 990113b1ecc1e..716090abf1c4a 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-128.ll @@ -30,12 +30,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlq $16, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $32, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrlq $32, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -70,12 +71,13 @@ define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlq $16, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $32, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrlq $32, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -308,12 +310,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlq $16, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlq $32, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrlq $32, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -348,12 +351,13 @@ define <2 x i64> @testv2i64u(<2 x i64> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlq $16, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlq $32, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrlq $32, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -583,12 +587,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $8, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -625,12 +630,13 @@ define <4 x i32> @testv4i32(<4 x i32> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrld $8, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $16, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrld $16, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -832,12 +838,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrld $8, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrld $16, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrld $16, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -874,12 +881,13 @@ define <4 x i32> @testv4i32u(<4 x i32> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrld $8, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrld $16, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrld $16, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1078,12 +1086,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1114,12 +1123,13 @@ define <8 x i16> @testv8i16(<8 x i16> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $8, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrlw $8, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1286,12 +1296,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE2-NEXT: por %xmm1, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $8, %xmm1 ; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 +; SSE2-NEXT: por %xmm1, %xmm0 +; SSE2-NEXT: psrlw $8, %xmm0 +; SSE2-NEXT: pandn %xmm3, %xmm0 ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $1, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1322,12 +1333,13 @@ define <8 x i16> @testv8i16u(<8 x i16> %in) nounwind { ; SSE3-NEXT: por %xmm1, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $8, %xmm1 ; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 ; SSE3-NEXT: pxor %xmm1, %xmm2 -; SSE3-NEXT: pandn %xmm2, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 +; SSE3-NEXT: por %xmm1, %xmm0 +; SSE3-NEXT: psrlw $8, %xmm0 +; SSE3-NEXT: pandn %xmm3, %xmm0 ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $1, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 @@ -1493,28 +1505,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 ; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pandn %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrlw $1, %xmm2 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: psubb %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pand %xmm2, %xmm3 ; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm1, %xmm0 ; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrlw $4, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: testv16i8: @@ -1526,28 +1539,29 @@ define <16 x i8> @testv16i8(<16 x i8> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 ; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE3-NEXT: pxor %xmm1, %xmm3 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE3-NEXT: pand %xmm1, %xmm0 ; SSE3-NEXT: pandn %xmm3, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE3-NEXT: movdqa %xmm0, %xmm2 +; SSE3-NEXT: psrlw $1, %xmm2 +; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE3-NEXT: psubb %xmm2, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm3 -; SSE3-NEXT: pand %xmm1, %xmm3 +; SSE3-NEXT: pand %xmm2, %xmm3 ; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm3, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm1, %xmm0 ; SSE3-NEXT: pand %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm2 +; SSE3-NEXT: psrlw $4, %xmm2 +; SSE3-NEXT: paddb %xmm2, %xmm0 +; SSE3-NEXT: pand %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: testv16i8: @@ -1656,28 +1670,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE2-NEXT: pxor %xmm1, %xmm2 +; SSE2-NEXT: movdqa %xmm0, %xmm3 +; SSE2-NEXT: pandn %xmm2, %xmm3 ; SSE2-NEXT: por %xmm1, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE2-NEXT: pand %xmm2, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE2-NEXT: pxor %xmm1, %xmm3 +; SSE2-NEXT: psrlw $4, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: pandn %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $1, %xmm1 -; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE2-NEXT: psubb %xmm1, %xmm0 -; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrlw $1, %xmm2 +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE2-NEXT: psubb %xmm2, %xmm0 +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: pand %xmm1, %xmm3 +; SSE2-NEXT: pand %xmm2, %xmm3 ; SSE2-NEXT: psrlw $2, %xmm0 -; SSE2-NEXT: pand %xmm1, %xmm0 -; SSE2-NEXT: paddb %xmm3, %xmm0 -; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: psrlw $4, %xmm1 -; SSE2-NEXT: paddb %xmm1, %xmm0 ; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: paddb %xmm3, %xmm0 +; SSE2-NEXT: movdqa %xmm0, %xmm2 +; SSE2-NEXT: psrlw $4, %xmm2 +; SSE2-NEXT: paddb %xmm2, %xmm0 +; SSE2-NEXT: pand %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE3-LABEL: testv16i8u: @@ -1689,28 +1704,29 @@ define <16 x i8> @testv16i8u(<16 x i8> %in) nounwind { ; SSE3-NEXT: movdqa %xmm0, %xmm1 ; SSE3-NEXT: psrlw $2, %xmm1 ; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 +; SSE3-NEXT: pcmpeqd %xmm2, %xmm2 +; SSE3-NEXT: pxor %xmm1, %xmm2 +; SSE3-NEXT: movdqa %xmm0, %xmm3 +; SSE3-NEXT: pandn %xmm2, %xmm3 ; SSE3-NEXT: por %xmm1, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; SSE3-NEXT: pand %xmm2, %xmm1 -; SSE3-NEXT: pcmpeqd %xmm3, %xmm3 -; SSE3-NEXT: pxor %xmm1, %xmm3 +; SSE3-NEXT: psrlw $4, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; SSE3-NEXT: pand %xmm1, %xmm0 ; SSE3-NEXT: pandn %xmm3, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $1, %xmm1 -; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 -; SSE3-NEXT: psubb %xmm1, %xmm0 -; SSE3-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] +; SSE3-NEXT: movdqa %xmm0, %xmm2 +; SSE3-NEXT: psrlw $1, %xmm2 +; SSE3-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2 +; SSE3-NEXT: psubb %xmm2, %xmm0 +; SSE3-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] ; SSE3-NEXT: movdqa %xmm0, %xmm3 -; SSE3-NEXT: pand %xmm1, %xmm3 +; SSE3-NEXT: pand %xmm2, %xmm3 ; SSE3-NEXT: psrlw $2, %xmm0 -; SSE3-NEXT: pand %xmm1, %xmm0 -; SSE3-NEXT: paddb %xmm3, %xmm0 -; SSE3-NEXT: movdqa %xmm0, %xmm1 -; SSE3-NEXT: psrlw $4, %xmm1 -; SSE3-NEXT: paddb %xmm1, %xmm0 ; SSE3-NEXT: pand %xmm2, %xmm0 +; SSE3-NEXT: paddb %xmm3, %xmm0 +; SSE3-NEXT: movdqa %xmm0, %xmm2 +; SSE3-NEXT: psrlw $4, %xmm2 +; SSE3-NEXT: paddb %xmm2, %xmm0 +; SSE3-NEXT: pand %xmm1, %xmm0 ; SSE3-NEXT: retq ; ; SSSE3-LABEL: testv16i8u: diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll index 8c24aa50a626e..8fe00afe0c0bb 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-512.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-512.ll @@ -26,19 +26,18 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) -; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 -; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 -; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlq $32, %zmm2, %zmm3 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0 +; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -54,32 +53,31 @@ define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm2 +; AVX512DQ-NEXT: vpsrlq $32, %zmm2, %zmm3 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm4 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6 -; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrlq $32, %ymm2, %ymm6 +; AVX512DQ-NEXT: vpor %ymm6, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpandn %ymm1, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 0) ret <8 x i64> %out @@ -107,19 +105,18 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { ; AVX512BW-NEXT: vpsrlq $8, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrlq $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) -; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 -; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 -; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrlq $32, %zmm2, %zmm3 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2 +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0 +; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq @@ -135,32 +132,31 @@ define <8 x i64> @testv8i64u(<8 x i64> %in) nounwind { ; AVX512DQ-NEXT: vpsrlq $8, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrlq $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpsrlq $32, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 -; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm1 -; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm3 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm4 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm2 +; AVX512DQ-NEXT: vpsrlq $32, %zmm2, %zmm3 +; AVX512DQ-NEXT: vpternlogq {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm3, %ymm0 +; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm4 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm5 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm4, %ymm5, %ymm4 -; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 -; AVX512DQ-NEXT: vpaddb %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 -; AVX512DQ-NEXT: vpsrlq $32, %ymm0, %ymm6 -; AVX512DQ-NEXT: vpor %ymm6, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpandn %ymm3, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm1, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpshufb %ymm0, %ymm5, %ymm0 -; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpand %ymm3, %ymm2, %ymm2 -; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 -; AVX512DQ-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpxor %xmm4, %xmm4, %xmm4 ; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpsrlq $32, %ymm2, %ymm6 +; AVX512DQ-NEXT: vpor %ymm6, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpandn %ymm1, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm5, %ymm2 +; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 +; AVX512DQ-NEXT: vpand %ymm1, %ymm3, %ymm1 +; AVX512DQ-NEXT: vpshufb %ymm1, %ymm5, %ymm1 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpsadbw %ymm4, %ymm1, %ymm1 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512DQ-NEXT: retq %out = call <8 x i64> @llvm.ctlz.v8i64(<8 x i64> %in, i1 -1) ret <8 x i64> %out @@ -186,19 +182,18 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) -; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 -; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 -; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrld $16, %zmm2, %zmm3 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2 +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0 +; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 @@ -216,39 +211,38 @@ define <16 x i32> @testv16i32(<16 x i32> %in) nounwind { ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm2 +; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm3 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5 +; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm5 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7] -; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5] -; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1 -; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpaddb %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5] ; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpandn %ymm4, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm6, %ymm2 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5] ; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 0) ret <16 x i32> %out @@ -274,19 +268,18 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 ; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $8, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = -1 -; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~zmm0 & (zmm3 ^ zmm1) -; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm3 -; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm3 -; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm2 & ~(zmm0 | zmm1) -; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] -; AVX512BW-NEXT: # zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] -; AVX512BW-NEXT: vpshufb %zmm2, %zmm0, %zmm1 -; AVX512BW-NEXT: vpshufb %zmm3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpaddb %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm2 +; AVX512BW-NEXT: vpsrld $16, %zmm2, %zmm3 +; AVX512BW-NEXT: vpbroadcastb {{.*#+}} zmm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] +; AVX512BW-NEXT: vpternlogq {{.*#+}} zmm2 = zmm4 & ~(zmm2 | zmm3) +; AVX512BW-NEXT: vbroadcasti32x4 {{.*#+}} zmm5 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] +; AVX512BW-NEXT: # zmm5 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] +; AVX512BW-NEXT: vpshufb %zmm2, %zmm5, %zmm2 +; AVX512BW-NEXT: vpternlogd {{.*#+}} zmm3 = ~(zmm3 | zmm0 | zmm1) +; AVX512BW-NEXT: vpsrlw $4, %zmm3, %zmm0 +; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpshufb %zmm0, %zmm5, %zmm0 +; AVX512BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpunpckhdq {{.*#+}} zmm2 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15] ; AVX512BW-NEXT: vpsadbw %zmm1, %zmm2, %zmm2 @@ -304,39 +297,38 @@ define <16 x i32> @testv16i32u(<16 x i32> %in) nounwind { ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 ; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $8, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 -; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = -1 -; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm2 = ~zmm0 & (zmm2 ^ zmm1) -; AVX512DQ-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm2 +; AVX512DQ-NEXT: vpsrld $16, %zmm2, %zmm3 +; AVX512DQ-NEXT: vpternlogd {{.*#+}} zmm0 = ~(zmm0 | zmm3 | zmm1) +; AVX512DQ-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512DQ-NEXT: vpbroadcastb {{.*#+}} ymm4 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] -; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm5 +; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm5 ; AVX512DQ-NEXT: vbroadcasti128 {{.*#+}} ymm6 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] ; AVX512DQ-NEXT: # ymm6 = mem[0,1,0,1] ; AVX512DQ-NEXT: vpshufb %ymm5, %ymm6, %ymm5 -; AVX512DQ-NEXT: vpsrlw $4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpand %ymm4, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpshufb %ymm3, %ymm6, %ymm3 -; AVX512DQ-NEXT: vpaddb %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm3[2],ymm5[2],ymm3[3],ymm5[3],ymm3[6],ymm5[6],ymm3[7],ymm5[7] -; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 -; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm3 = ymm3[0],ymm5[0],ymm3[1],ymm5[1],ymm3[4],ymm5[4],ymm3[5],ymm5[5] -; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm3, %ymm3 -; AVX512DQ-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpandn %ymm4, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 -; AVX512DQ-NEXT: vpsrlw $4, %ymm2, %ymm1 +; AVX512DQ-NEXT: vpsrlw $4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512DQ-NEXT: vpshufb %ymm1, %ymm6, %ymm1 -; AVX512DQ-NEXT: vpaddb %ymm0, %ymm1, %ymm0 -; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm1 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpaddb %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpxor %xmm5, %xmm5, %xmm5 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm7 = ymm1[2],ymm5[2],ymm1[3],ymm5[3],ymm1[6],ymm5[6],ymm1[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm7, %ymm7 +; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm1 = ymm1[0],ymm5[0],ymm1[1],ymm5[1],ymm1[4],ymm5[4],ymm1[5],ymm5[5] ; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpackuswb %ymm7, %ymm1, %ymm1 +; AVX512DQ-NEXT: vpor %ymm3, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpandn %ymm4, %ymm2, %ymm2 +; AVX512DQ-NEXT: vpshufb %ymm2, %ymm6, %ymm2 +; AVX512DQ-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpshufb %ymm0, %ymm6, %ymm0 +; AVX512DQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhdq {{.*#+}} ymm2 = ymm0[2],ymm5[2],ymm0[3],ymm5[3],ymm0[6],ymm5[6],ymm0[7],ymm5[7] +; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm2, %ymm2 ; AVX512DQ-NEXT: vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm5[0],ymm0[1],ymm5[1],ymm0[4],ymm5[4],ymm0[5],ymm5[5] ; AVX512DQ-NEXT: vpsadbw %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 -; AVX512DQ-NEXT: vinserti64x4 $1, %ymm3, %zmm0, %zmm0 +; AVX512DQ-NEXT: vpackuswb %ymm2, %ymm0, %ymm0 +; AVX512DQ-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: retq %out = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %in, i1 -1) ret <16 x i32> %out diff --git a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll index 1473da6aac5ea..555d033ac5ee4 100644 --- a/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll +++ b/llvm/test/CodeGen/X86/vector-lzcnt-sub128.ll @@ -17,12 +17,13 @@ define <2 x i32> @illegal_ctlz(<2 x i32> %v1) { ; CHECK-NEXT: por %xmm1, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrld $8, %xmm1 -; CHECK-NEXT: por %xmm1, %xmm0 -; CHECK-NEXT: movdqa %xmm0, %xmm1 -; CHECK-NEXT: psrld $16, %xmm1 ; CHECK-NEXT: pcmpeqd %xmm2, %xmm2 ; CHECK-NEXT: pxor %xmm1, %xmm2 -; CHECK-NEXT: pandn %xmm2, %xmm0 +; CHECK-NEXT: movdqa %xmm0, %xmm3 +; CHECK-NEXT: pandn %xmm2, %xmm3 +; CHECK-NEXT: por %xmm1, %xmm0 +; CHECK-NEXT: psrld $16, %xmm0 +; CHECK-NEXT: pandn %xmm3, %xmm0 ; CHECK-NEXT: movdqa %xmm0, %xmm1 ; CHECK-NEXT: psrlw $1, %xmm1 ; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 diff --git a/llvm/test/CodeGen/X86/vector-popcnt-128.ll b/llvm/test/CodeGen/X86/vector-popcnt-128.ll index d8e955c93581e..c1d30b6d5a995 100644 --- a/llvm/test/CodeGen/X86/vector-popcnt-128.ll +++ b/llvm/test/CodeGen/X86/vector-popcnt-128.ll @@ -826,11 +826,11 @@ define <2 x i64> @ne_1_v2i64(<2 x i64> %0) { ; SSE-NEXT: pcmpgtd %xmm2, %xmm3 ; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm3[0,0,2,2] ; SSE-NEXT: pcmpeqd %xmm2, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] -; SSE-NEXT: pand %xmm4, %xmm0 -; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3] -; SSE-NEXT: pxor %xmm1, %xmm2 -; SSE-NEXT: pandn %xmm2, %xmm0 +; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3] +; SSE-NEXT: pand %xmm4, %xmm2 +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3] +; SSE-NEXT: por %xmm2, %xmm0 +; SSE-NEXT: pxor %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX1OR2-LABEL: ne_1_v2i64: diff --git a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll index 55f2258aad018..97124f0a9d8d9 100644 --- a/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll +++ b/llvm/test/CodeGen/X86/vector-unsigned-cmp.ll @@ -117,9 +117,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: uge_v2i64: @@ -136,9 +136,9 @@ define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: uge_v2i64: @@ -170,9 +170,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE2-NEXT: pand %xmm3, %xmm0 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE2-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE2-NEXT: pxor %xmm1, %xmm2 -; SSE2-NEXT: pandn %xmm2, %xmm0 +; SSE2-NEXT: por %xmm0, %xmm1 +; SSE2-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE2-NEXT: pxor %xmm1, %xmm0 ; SSE2-NEXT: retq ; ; SSE41-LABEL: ule_v2i64: @@ -189,9 +189,9 @@ define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) { ; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] ; SSE41-NEXT: pand %xmm3, %xmm0 ; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] -; SSE41-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE41-NEXT: pxor %xmm1, %xmm2 -; SSE41-NEXT: pandn %xmm2, %xmm0 +; SSE41-NEXT: por %xmm0, %xmm1 +; SSE41-NEXT: pcmpeqd %xmm0, %xmm0 +; SSE41-NEXT: pxor %xmm1, %xmm0 ; SSE41-NEXT: retq ; ; AVX-LABEL: ule_v2i64: From c95c10a91b7c657f514282bd821efb99c5c549d2 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Wed, 22 Oct 2025 18:48:51 +0000 Subject: [PATCH 18/20] [LoongArch][SystemZ]: Updated tests --- .../test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll | 141 +++++++++++------- llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll | 20 ++- llvm/test/CodeGen/SystemZ/vec-eval.ll | 49 +++--- 3 files changed, 136 insertions(+), 74 deletions(-) diff --git a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll index 27be02c50f1c7..4c5eab036dbb4 100644 --- a/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/LoongArch/ctlz-cttz-ctpop.ll @@ -21,13 +21,15 @@ define i8 @test_ctlz_i8(i8 %a) nounwind { ; LA32R: # %bb.0: ; LA32R-NEXT: andi $a1, $a0, 254 ; LA32R-NEXT: srli.w $a1, $a1, 1 +; LA32R-NEXT: nor $a2, $a0, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: andi $a1, $a0, 252 ; LA32R-NEXT: srli.w $a1, $a1, 2 +; LA32R-NEXT: andn $a2, $a2, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: andi $a1, $a0, 240 -; LA32R-NEXT: srli.w $a1, $a1, 4 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: andi $a0, $a0, 240 +; LA32R-NEXT: srli.w $a0, $a0, 4 +; LA32R-NEXT: andn $a0, $a2, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: andi $a1, $a1, 85 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -60,23 +62,28 @@ define i8 @test_ctlz_i8(i8 %a) nounwind { define i16 @test_ctlz_i16(i16 %a) nounwind { ; LA32R-LABEL: test_ctlz_i16: ; LA32R: # %bb.0: +; LA32R-NEXT: srli.w $a1, $a0, 1 +; LA32R-NEXT: lu12i.w $a2, 7 +; LA32R-NEXT: ori $a2, $a2, 4095 +; LA32R-NEXT: and $a1, $a1, $a2 +; LA32R-NEXT: nor $a2, $a0, $zero +; LA32R-NEXT: andn $a2, $a2, $a1 +; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: lu12i.w $a1, 15 -; LA32R-NEXT: ori $a2, $a1, 4094 -; LA32R-NEXT: and $a2, $a0, $a2 -; LA32R-NEXT: srli.w $a2, $a2, 1 -; LA32R-NEXT: or $a0, $a0, $a2 -; LA32R-NEXT: ori $a2, $a1, 4092 -; LA32R-NEXT: and $a2, $a0, $a2 -; LA32R-NEXT: srli.w $a2, $a2, 2 -; LA32R-NEXT: or $a0, $a0, $a2 -; LA32R-NEXT: ori $a2, $a1, 4080 -; LA32R-NEXT: and $a2, $a0, $a2 -; LA32R-NEXT: srli.w $a2, $a2, 4 -; LA32R-NEXT: or $a0, $a0, $a2 +; LA32R-NEXT: ori $a3, $a1, 4092 +; LA32R-NEXT: and $a3, $a0, $a3 +; LA32R-NEXT: srli.w $a3, $a3, 2 +; LA32R-NEXT: andn $a2, $a2, $a3 +; LA32R-NEXT: or $a0, $a0, $a3 +; LA32R-NEXT: ori $a3, $a1, 4080 +; LA32R-NEXT: and $a3, $a0, $a3 +; LA32R-NEXT: srli.w $a3, $a3, 4 +; LA32R-NEXT: andn $a2, $a2, $a3 +; LA32R-NEXT: or $a0, $a0, $a3 ; LA32R-NEXT: ori $a1, $a1, 3840 -; LA32R-NEXT: and $a1, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a1, 8 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: and $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 8 +; LA32R-NEXT: andn $a0, $a2, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: lu12i.w $a2, 5 ; LA32R-NEXT: ori $a2, $a2, 1365 @@ -117,15 +124,19 @@ define i32 @test_ctlz_i32(i32 %a) nounwind { ; LA32R-LABEL: test_ctlz_i32: ; LA32R: # %bb.0: ; LA32R-NEXT: srli.w $a1, $a0, 1 +; LA32R-NEXT: nor $a2, $a0, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 2 +; LA32R-NEXT: andn $a2, $a2, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 4 +; LA32R-NEXT: andn $a2, $a2, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 8 +; LA32R-NEXT: andn $a2, $a2, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a2, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: lu12i.w $a2, 349525 ; LA32R-NEXT: ori $a2, $a2, 1365 @@ -175,15 +186,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; LA32R-NEXT: bne $a1, $zero, .LBB3_2 ; LA32R-NEXT: # %bb.1: ; LA32R-NEXT: srli.w $a1, $a0, 1 +; LA32R-NEXT: nor $a6, $a0, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 2 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 4 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 8 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a6, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: and $a1, $a1, $a5 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -201,15 +216,19 @@ define i64 @test_ctlz_i64(i64 %a) nounwind { ; LA32R-NEXT: ret ; LA32R-NEXT: .LBB3_2: ; LA32R-NEXT: srli.w $a0, $a1, 1 +; LA32R-NEXT: nor $a6, $a1, $a0 ; LA32R-NEXT: or $a0, $a1, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 2 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 4 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 8 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a6, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: and $a1, $a1, $a5 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -250,14 +269,17 @@ define i8 @test_not_ctlz_i8(i8 %a) nounwind { ; LA32R: # %bb.0: ; LA32R-NEXT: ori $a1, $zero, 254 ; LA32R-NEXT: andn $a1, $a1, $a0 +; LA32R-NEXT: nor $a2, $a0, $zero ; LA32R-NEXT: srli.w $a1, $a1, 1 +; LA32R-NEXT: nor $a2, $a2, $a1 ; LA32R-NEXT: orn $a0, $a1, $a0 ; LA32R-NEXT: andi $a1, $a0, 252 ; LA32R-NEXT: srli.w $a1, $a1, 2 +; LA32R-NEXT: andn $a2, $a2, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: andi $a1, $a0, 240 -; LA32R-NEXT: srli.w $a1, $a1, 4 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: andi $a0, $a0, 240 +; LA32R-NEXT: srli.w $a0, $a0, 4 +; LA32R-NEXT: andn $a0, $a2, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: andi $a1, $a1, 85 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -293,19 +315,22 @@ define i16 @test_not_ctlz_i16(i16 %a) nounwind { ; LA32R-NEXT: ori $a2, $a1, 4094 ; LA32R-NEXT: andn $a2, $a2, $a0 ; LA32R-NEXT: srli.w $a2, $a2, 1 +; LA32R-NEXT: andn $a3, $a0, $a2 ; LA32R-NEXT: orn $a0, $a2, $a0 ; LA32R-NEXT: ori $a2, $a1, 4092 ; LA32R-NEXT: and $a2, $a0, $a2 ; LA32R-NEXT: srli.w $a2, $a2, 2 +; LA32R-NEXT: andn $a3, $a3, $a2 ; LA32R-NEXT: or $a0, $a0, $a2 ; LA32R-NEXT: ori $a2, $a1, 4080 ; LA32R-NEXT: and $a2, $a0, $a2 ; LA32R-NEXT: srli.w $a2, $a2, 4 +; LA32R-NEXT: andn $a3, $a3, $a2 ; LA32R-NEXT: or $a0, $a0, $a2 ; LA32R-NEXT: ori $a1, $a1, 3840 -; LA32R-NEXT: and $a1, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a1, 8 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: and $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 8 +; LA32R-NEXT: andn $a0, $a3, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: lu12i.w $a2, 5 ; LA32R-NEXT: ori $a2, $a2, 1365 @@ -345,16 +370,20 @@ define i32 @test_not_ctlz_i32(i32 %a) nounwind { ; LA32R-LABEL: test_not_ctlz_i32: ; LA32R: # %bb.0: ; LA32R-NEXT: nor $a1, $a0, $zero -; LA32R-NEXT: srli.w $a1, $a1, 1 -; LA32R-NEXT: orn $a0, $a1, $a0 -; LA32R-NEXT: srli.w $a1, $a0, 2 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 4 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 8 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a2, $a1, 1 +; LA32R-NEXT: nor $a1, $a1, $a2 +; LA32R-NEXT: orn $a0, $a2, $a0 +; LA32R-NEXT: srli.w $a2, $a0, 2 +; LA32R-NEXT: andn $a1, $a1, $a2 +; LA32R-NEXT: or $a0, $a0, $a2 +; LA32R-NEXT: srli.w $a2, $a0, 4 +; LA32R-NEXT: andn $a1, $a1, $a2 +; LA32R-NEXT: or $a0, $a0, $a2 +; LA32R-NEXT: srli.w $a2, $a0, 8 +; LA32R-NEXT: andn $a1, $a1, $a2 +; LA32R-NEXT: or $a0, $a0, $a2 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a1, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: lu12i.w $a2, 349525 ; LA32R-NEXT: ori $a2, $a2, 1365 @@ -406,16 +435,20 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind { ; LA32R-NEXT: bne $a6, $zero, .LBB7_2 ; LA32R-NEXT: # %bb.1: ; LA32R-NEXT: nor $a1, $a0, $zero -; LA32R-NEXT: srli.w $a1, $a1, 1 -; LA32R-NEXT: orn $a0, $a1, $a0 -; LA32R-NEXT: srli.w $a1, $a0, 2 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 4 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 8 -; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a6, $a1, 1 +; LA32R-NEXT: nor $a1, $a1, $a6 +; LA32R-NEXT: orn $a0, $a6, $a0 +; LA32R-NEXT: srli.w $a6, $a0, 2 +; LA32R-NEXT: andn $a1, $a1, $a6 +; LA32R-NEXT: or $a0, $a0, $a6 +; LA32R-NEXT: srli.w $a6, $a0, 4 +; LA32R-NEXT: andn $a1, $a1, $a6 +; LA32R-NEXT: or $a0, $a0, $a6 +; LA32R-NEXT: srli.w $a6, $a0, 8 +; LA32R-NEXT: andn $a1, $a1, $a6 +; LA32R-NEXT: or $a0, $a0, $a6 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a1, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: and $a1, $a1, $a5 ; LA32R-NEXT: sub.w $a0, $a0, $a1 @@ -433,15 +466,19 @@ define i64 @test_not_ctlz_i64(i64 %a) nounwind { ; LA32R-NEXT: ret ; LA32R-NEXT: .LBB7_2: ; LA32R-NEXT: srli.w $a0, $a6, 1 +; LA32R-NEXT: nor $a6, $a6, $a0 ; LA32R-NEXT: orn $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 2 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 4 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 ; LA32R-NEXT: srli.w $a1, $a0, 8 +; LA32R-NEXT: andn $a6, $a6, $a1 ; LA32R-NEXT: or $a0, $a0, $a1 -; LA32R-NEXT: srli.w $a1, $a0, 16 -; LA32R-NEXT: nor $a0, $a0, $a1 +; LA32R-NEXT: srli.w $a0, $a0, 16 +; LA32R-NEXT: andn $a0, $a6, $a0 ; LA32R-NEXT: srli.w $a1, $a0, 1 ; LA32R-NEXT: and $a1, $a1, $a5 ; LA32R-NEXT: sub.w $a0, $a0, $a1 diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll index 2c3bf944cdf89..9ff15f946d2d6 100644 --- a/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll +++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-02.ll @@ -11,25 +11,31 @@ define i128 @f1(i128 %a) { ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vrepib %v1, 1 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vno %v2, %v0, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 2 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 4 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 8 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 16 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 32 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 64 -; CHECK-NEXT: vsrlb %v1, %v0, %v1 -; CHECK-NEXT: vno %v0, %v0, %v1 +; CHECK-NEXT: vsrlb %v0, %v0, %v1 +; CHECK-NEXT: vnc %v0, %v2, %v0 ; CHECK-NEXT: vpopct %v0, %v0, 0 ; CHECK-NEXT: vgbm %v1, 0 ; CHECK-NEXT: vsumb %v0, %v0, %v1 @@ -47,25 +53,31 @@ define i128 @f2(i128 %a) { ; CHECK-NEXT: vl %v0, 0(%r3), 3 ; CHECK-NEXT: vrepib %v1, 1 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vno %v2, %v0, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 2 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 4 ; CHECK-NEXT: vsrl %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 8 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 16 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 32 ; CHECK-NEXT: vsrlb %v1, %v0, %v1 +; CHECK-NEXT: vnc %v2, %v2, %v1 ; CHECK-NEXT: vo %v0, %v0, %v1 ; CHECK-NEXT: vrepib %v1, 64 -; CHECK-NEXT: vsrlb %v1, %v0, %v1 -; CHECK-NEXT: vno %v0, %v0, %v1 +; CHECK-NEXT: vsrlb %v0, %v0, %v1 +; CHECK-NEXT: vnc %v0, %v2, %v0 ; CHECK-NEXT: vpopct %v0, %v0, 0 ; CHECK-NEXT: vgbm %v1, 0 ; CHECK-NEXT: vsumb %v0, %v0, %v1 diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll index bcdedcd3a407b..417fcb90af9a3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-eval.ll +++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll @@ -1889,7 +1889,9 @@ entry: define <16 x i8> @eval128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval128: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 128 +; CHECK-NEXT: vno %v0, %v24, %v24 +; CHECK-NEXT: vno %v1, %v26, %v26 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 2 ; CHECK-NEXT: br %r14 entry: %and.demorgan = or <16 x i8> %src2, %src1 @@ -1901,9 +1903,10 @@ entry: define <16 x i8> @eval129(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval129: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v26, %v24 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 ; CHECK-NEXT: vn %v1, %v26, %v24 -; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 139 +; CHECK-NEXT: vsel %v24, %v1, %v0, %v28 ; CHECK-NEXT: br %r14 entry: %and.demorgan = or <16 x i8> %src2, %src1 @@ -2034,8 +2037,10 @@ entry: define <16 x i8> @eval138(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval138: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127 -; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 174 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 +; CHECK-NEXT: vnc %v1, %v24, %v28 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47 ; CHECK-NEXT: br %r14 entry: %not2 = xor <16 x i8> %src3, splat(i8 -1) @@ -2050,9 +2055,10 @@ entry: define <16 x i8> @eval139(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval139: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v26, %v24 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 ; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 11 -; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47 ; CHECK-NEXT: br %r14 entry: %0 = or <16 x i8> %src2, %src1 @@ -2068,8 +2074,10 @@ entry: define <16 x i8> @eval140(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval140: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 127 -; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 174 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v28, %v0, %v24, 40 +; CHECK-NEXT: vnc %v1, %v24, %v26 +; CHECK-NEXT: veval %v24, %v1, %v0, %v26, 47 ; CHECK-NEXT: br %r14 entry: %not1 = xor <16 x i8> %src2, splat(i8 -1) @@ -2084,10 +2092,11 @@ entry: define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval141: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vgbm %v0, 65535 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1 -; CHECK-NEXT: vo %v0, %v26, %v24 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 ; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47 -; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47 ; CHECK-NEXT: br %r14 entry: %not1 = xor <16 x i8> %src2, splat(i8 -1) @@ -2105,9 +2114,10 @@ entry: define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval142: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127 -; CHECK-NEXT: vn %v1, %v28, %v26 -; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 +; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 14 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47 ; CHECK-NEXT: br %r14 entry: %0 = or <16 x i8> %src2, %src1 @@ -2441,8 +2451,10 @@ entry: define <16 x i8> @eval162(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval162: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: veval %v0, %v28, %v24, %v26, 127 -; CHECK-NEXT: veval %v24, %v26, %v28, %v0, 174 +; CHECK-NEXT: vgbm %v1, 65535 +; CHECK-NEXT: vno %v0, %v28, %v28 +; CHECK-NEXT: veval %v1, %v24, %v1, %v28, 40 +; CHECK-NEXT: vsel %v24, %v0, %v1, %v26 ; CHECK-NEXT: br %r14 entry: %not2 = xor <16 x i8> %src3, splat(i8 -1) @@ -2457,9 +2469,10 @@ entry: define <16 x i8> @eval163(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) { ; CHECK-LABEL: eval163: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vo %v0, %v26, %v24 +; CHECK-NEXT: vgbm %v0, 65535 +; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40 ; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 11 -; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143 +; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47 ; CHECK-NEXT: br %r14 entry: %0 = or <16 x i8> %src2, %src1 From 3681932bdb9559e53e4f31f4a53cf7af43f361e9 Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Wed, 22 Oct 2025 19:05:28 +0000 Subject: [PATCH 19/20] [AArch64][PowerPC]: Reverting some updates --- llvm/test/CodeGen/AArch64/eon.ll | 9 +++ llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll | 1 - .../CodeGen/PowerPC/vec_veqv_vnand_vorc.ll | 20 ++---- llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll | 65 ------------------- 4 files changed, 14 insertions(+), 81 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/eon.ll b/llvm/test/CodeGen/AArch64/eon.ll index ea0e0122d9b6d..f939b4901be09 100644 --- a/llvm/test/CodeGen/AArch64/eon.ll +++ b/llvm/test/CodeGen/AArch64/eon.ll @@ -36,6 +36,10 @@ entry: ; Check that eon is generated if the xor is a disjoint or. define i64 @disjoint_or(i64 %a, i64 %b) { +; CHECK-LABEL: disjoint_or: +; CHECK: // %bb.0: +; CHECK-NEXT: eon x0, x0, x1 +; CHECK-NEXT: ret %or = or disjoint i64 %a, %b %eon = xor i64 %or, -1 ret i64 %eon @@ -43,6 +47,11 @@ define i64 @disjoint_or(i64 %a, i64 %b) { ; Check that eon is *not* generated if the or is not disjoint. define i64 @normal_or(i64 %a, i64 %b) { +; CHECK-LABEL: normal_or: +; CHECK: // %bb.0: +; CHECK-NEXT: orr x8, x0, x1 +; CHECK-NEXT: mvn x0, x8 +; CHECK-NEXT: ret %or = or i64 %a, %b %not = xor i64 %or, -1 ret i64 %not diff --git a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll index ed8dc504f026a..bea24ee98336d 100644 --- a/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll +++ b/llvm/test/CodeGen/PowerPC/eqv-andc-orc-nor.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | \ ; RUN: grep eqv | count 3 ; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | \ diff --git a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll index 310f0a66aa9b9..c23daac80279b 100644 --- a/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll +++ b/llvm/test/CodeGen/PowerPC/vec_veqv_vnand_vorc.ll @@ -1,39 +1,29 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; Check the miscellaneous logical vector operations added in P8 -; +; ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=-vsx < %s | FileCheck %s ; Test x eqv y define <4 x i32> @test_veqv(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: test_veqv: -; CHECK: # %bb.0: -; CHECK-NEXT: veqv 2, 2, 3 -; CHECK-NEXT: blr %tmp = xor <4 x i32> %x, %y %ret_val = xor <4 x i32> %tmp, < i32 -1, i32 -1, i32 -1, i32 -1> ret <4 x i32> %ret_val +; CHECK: veqv 2, 2, 3 } ; Test x vnand y define <4 x i32> @test_vnand(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: test_vnand: -; CHECK: # %bb.0: -; CHECK-NEXT: vnand 2, 2, 3 -; CHECK-NEXT: blr %tmp = and <4 x i32> %x, %y %ret_val = xor <4 x i32> %tmp, ret <4 x i32> %ret_val +; CHECK: vnand 2, 2, 3 } ; Test x vorc y and variants define <4 x i32> @test_vorc(<4 x i32> %x, <4 x i32> %y) nounwind { -; CHECK-LABEL: test_vorc: -; CHECK: # %bb.0: -; CHECK-NEXT: vorc 3, 2, 3 -; CHECK-NEXT: vorc 2, 2, 3 -; CHECK-NEXT: blr %tmp1 = xor <4 x i32> %y, %tmp2 = or <4 x i32> %x, %tmp1 +; CHECK: vorc 3, 2, 3 %tmp3 = xor <4 x i32> %tmp2, %tmp4 = or <4 x i32> %tmp3, %x +; CHECK: vorc 2, 2, 3 ret <4 x i32> %tmp4 } diff --git a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll index 7f7a52fe7de65..ba74df956e71e 100644 --- a/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll +++ b/llvm/test/CodeGen/PowerPC/xxeval-and-nand.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 ; RUN: llc -verify-machineinstrs -mcpu=pwr10 -mtriple=powerpc64le-unknown-unknown \ ; RUN: -ppc-asm-full-reg-names --ppc-vsr-nums-as-vr < %s | FileCheck %s @@ -7,10 +6,6 @@ ; CHECK: xxlandc v2, v2, v3 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_not(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_not: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxlandc v2, v2, v3 -; CHECK-NEXT: blr entry: %neg = xor <4 x i32> %B, %and = and <4 x i32> %neg, %A @@ -22,10 +17,6 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <16 x i8> @and_and8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_and8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 -; CHECK-NEXT: blr entry: %and = and <16 x i8> %B, %A %and1 = and <16 x i8> %and, %C @@ -37,10 +28,6 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <8 x i16> @and_and16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_and16: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 -; CHECK-NEXT: blr entry: %and = and <8 x i16> %B, %A %and1 = and <8 x i16> %and, %C @@ -52,10 +39,6 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_and32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_and32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 -; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %A %and1 = and <4 x i32> %and, %C @@ -67,10 +50,6 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 1 ; CHECK-NEXT: blr define dso_local <2 x i64> @and_and64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_and64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v3, v2, v4, 1 -; CHECK-NEXT: blr entry: %and = and <2 x i64> %B, %A %and1 = and <2 x i64> %and, %C @@ -82,10 +61,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 14 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_nand: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 14 -; CHECK-NEXT: blr entry: %and = and <4 x i32> %C, %B %neg = xor <4 x i32> %and, @@ -98,10 +73,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 7 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_or: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 7 -; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %and = and <4 x i32> %or, %A @@ -113,10 +84,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 8 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_nor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 8 -; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %neg = xor <4 x i32> %or, @@ -129,10 +96,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 6 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_xor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 6 -; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %and = and <4 x i32> %xor, %A @@ -144,10 +107,6 @@ entry: ; CHECK: xxeval v2, v2, v3, v4, 9 ; CHECK-NEXT: blr define dso_local <4 x i32> @and_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: and_eqv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 9 -; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %B, %neg = xor <4 x i32> %xor, %C @@ -160,10 +119,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 241 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_nand(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_nand: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 241 -; CHECK-NEXT: blr entry: %and = and <4 x i32> %C, %B %A.not = xor <4 x i32> %A, @@ -176,10 +131,6 @@ entry: ; CHECK: xxeval v2, v3, v2, v4, 254 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_and(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_and: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v3, v2, v4, 254 -; CHECK-NEXT: blr entry: %and = and <4 x i32> %B, %A %and1 = and <4 x i32> %and, %C @@ -192,10 +143,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 249 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_xor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_xor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 249 -; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %and = and <4 x i32> %xor, %A @@ -208,10 +155,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 246 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_eqv(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_eqv: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 246 -; CHECK-NEXT: blr entry: %xor = xor <4 x i32> %C, %B %A.not = xor <4 x i32> %A, @@ -224,10 +167,6 @@ entry: ; CHECK: xxeval v2, v2, v4, v3, 248 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_or(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_or: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v4, v3, 248 -; CHECK-NEXT: blr entry: %or = or <4 x i32> %C, %B %and = and <4 x i32> %or, %A @@ -240,10 +179,6 @@ entry: ; CHECK: xxeval v2, v2, v3, v4, 247 ; CHECK-NEXT: blr define dso_local <4 x i32> @nand_nor(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) local_unnamed_addr #0 { -; CHECK-LABEL: nand_nor: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xxeval v2, v2, v3, v4, 247 -; CHECK-NEXT: blr entry: %A.not = xor <4 x i32> %A, %or = or <4 x i32> %A.not, %B From b3ec648bec97aa472e73a7cb4bcc93c8bbbfbf5d Mon Sep 17 00:00:00 2001 From: Kevin Per Date: Wed, 22 Oct 2025 19:06:41 +0000 Subject: [PATCH 20/20] [X86]: Removed comment --- llvm/lib/Target/X86/X86ISelLowering.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e870514db2443..16f6d31728717 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -55615,13 +55615,6 @@ static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, // Folds for better commutativity: if (N1->hasOneUse()) { - /* - // ANDNP(x,NOT(y)) -> AND(NOT(x),NOT(y)) -> NOT(OR(X,Y)). - if (SDValue Not = IsNOT(N1, DAG)) - return DAG.getNOT( - DL, DAG.getNode(ISD::OR, DL, VT, N0, DAG.getBitcast(VT, Not)), VT); - */ - // ANDNP(x,PSHUFB(y,z)) -> PSHUFB(y,OR(z,x)) // Zero out elements by setting the PSHUFB mask value to 0xFF. if (DAG.ComputeNumSignBits(N0) == EltSizeInBits) {