|
50 | 50 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
51 | 51 | #include "llvm/CodeGen/MachineMemOperand.h" |
52 | 52 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 53 | +#include "llvm/CodeGen/SDPatternMatch.h" |
53 | 54 | #include "llvm/CodeGen/SelectionDAG.h" |
54 | 55 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
55 | 56 | #include "llvm/CodeGen/TargetCallingConv.h" |
|
104 | 105 | #include <vector> |
105 | 106 |
|
106 | 107 | using namespace llvm; |
107 | | -using namespace llvm::PatternMatch; |
108 | 108 |
|
109 | 109 | #define DEBUG_TYPE "aarch64-lower" |
110 | 110 |
|
@@ -1174,6 +1174,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, |
1174 | 1174 |
|
1175 | 1175 | setTargetDAGCombine(ISD::SHL); |
1176 | 1176 | setTargetDAGCombine(ISD::VECTOR_DEINTERLEAVE); |
| 1177 | + setTargetDAGCombine(ISD::CTPOP); |
1177 | 1178 |
|
1178 | 1179 | // In case of strict alignment, avoid an excessive number of byte wide stores. |
1179 | 1180 | MaxStoresPerMemsetOptSize = 8; |
@@ -17555,6 +17556,7 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion( |
17555 | 17556 | // udot instruction. |
17556 | 17557 | if (SrcWidth * 4 <= DstWidth) { |
17557 | 17558 | if (all_of(I->users(), [&](auto *U) { |
| 17559 | + using namespace llvm::PatternMatch; |
17558 | 17560 | auto *SingleUser = cast<Instruction>(&*U); |
17559 | 17561 | if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value())))) |
17560 | 17562 | return true; |
@@ -17826,6 +17828,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( |
17826 | 17828 | // into shift / and masks. For the moment we do this just for uitofp (not |
17827 | 17829 | // zext) to avoid issues with widening instructions. |
17828 | 17830 | if (Shuffles.size() == 4 && all_of(Shuffles, [](ShuffleVectorInst *SI) { |
| 17831 | + using namespace llvm::PatternMatch; |
17829 | 17832 | return SI->hasOneUse() && match(SI->user_back(), m_UIToFP(m_Value())) && |
17830 | 17833 | SI->getType()->getScalarSizeInBits() * 4 == |
17831 | 17834 | SI->user_back()->getType()->getScalarSizeInBits(); |
@@ -27842,6 +27845,35 @@ static SDValue performRNDRCombine(SDNode *N, SelectionDAG &DAG) { |
27842 | 27845 | {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); |
27843 | 27846 | } |
27844 | 27847 |
|
| 27848 | +static SDValue performCTPOPCombine(SDNode *N, |
| 27849 | + TargetLowering::DAGCombinerInfo &DCI, |
| 27850 | + SelectionDAG &DAG) { |
| 27851 | + using namespace llvm::SDPatternMatch; |
| 27852 | + if (!DCI.isBeforeLegalize()) |
| 27853 | + return SDValue(); |
| 27854 | + |
| 27855 | + // ctpop(zext(bitcast(vector_mask))) -> neg(signed_reduce_add(vector_mask)) |
| 27856 | + SDValue Mask; |
| 27857 | + if (!sd_match(N->getOperand(0), m_ZExt(m_BitCast(m_Value(Mask))))) |
| 27858 | + return SDValue(); |
| 27859 | + |
| 27860 | + EVT VT = N->getValueType(0); |
| 27861 | + EVT MaskVT = Mask.getValueType(); |
| 27862 | + |
| 27863 | + if (VT.isVector() || !MaskVT.isFixedLengthVector() || |
| 27864 | + MaskVT.getVectorElementType() != MVT::i1) |
| 27865 | + return SDValue(); |
| 27866 | + |
| 27867 | + EVT ReduceInVT = |
| 27868 | + EVT::getVectorVT(*DAG.getContext(), VT, MaskVT.getVectorElementCount()); |
| 27869 | + |
| 27870 | + SDLoc DL(N); |
| 27871 | + // Sign extend to best fit ZeroOrNegativeOneBooleanContent. |
| 27872 | + SDValue ExtMask = DAG.getNode(ISD::SIGN_EXTEND, DL, ReduceInVT, Mask); |
| 27873 | + SDValue NegPopCount = DAG.getNode(ISD::VECREDUCE_ADD, DL, VT, ExtMask); |
| 27874 | + return DAG.getNegative(NegPopCount, DL, VT); |
| 27875 | +} |
| 27876 | + |
27845 | 27877 | SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, |
27846 | 27878 | DAGCombinerInfo &DCI) const { |
27847 | 27879 | SelectionDAG &DAG = DCI.DAG; |
@@ -28187,6 +28219,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, |
28187 | 28219 | return performScalarToVectorCombine(N, DCI, DAG); |
28188 | 28220 | case ISD::SHL: |
28189 | 28221 | return performSHLCombine(N, DCI, DAG); |
| 28222 | + case ISD::CTPOP: |
| 28223 | + return performCTPOPCombine(N, DCI, DAG); |
28190 | 28224 | } |
28191 | 28225 | return SDValue(); |
28192 | 28226 | } |
|
0 commit comments