diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..26b5e5a22386e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; // Other cases are autogenerated. break; - case ARMISD::WLSSETUP: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::WLS: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, - N->getOperand(1), N->getOperand(2), - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::LE: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - unsigned Opc = ARM::t2LoopEnd; - SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LDRD: { if (Subtarget->isThumb2()) break; // TableGen handles isel in this case. @@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LOOP_DEC: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - SDNode *Dec = - CurDAG->getMachineNode(ARM::t2LoopDec, dl, - CurDAG->getVTList(MVT::i32, MVT::Other), Ops); - ReplaceUses(N, Dec); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index f28640ce7b107..cd8d7a0bee5e3 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1556,220 +1556,6 @@ ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, return std::make_pair(RRC, Cost); } -const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { -#define MAKE_CASE(V) \ - case V: \ - return #V; - switch ((ARMISD::NodeType)Opcode) { - case ARMISD::FIRST_NUMBER: - break; - MAKE_CASE(ARMISD::Wrapper) - MAKE_CASE(ARMISD::WrapperPIC) - MAKE_CASE(ARMISD::WrapperJT) - MAKE_CASE(ARMISD::COPY_STRUCT_BYVAL) - MAKE_CASE(ARMISD::CALL) - MAKE_CASE(ARMISD::CALL_PRED) - MAKE_CASE(ARMISD::CALL_NOLINK) - MAKE_CASE(ARMISD::tSECALL) - MAKE_CASE(ARMISD::t2CALL_BTI) - MAKE_CASE(ARMISD::BRCOND) - MAKE_CASE(ARMISD::BR_JT) - MAKE_CASE(ARMISD::BR2_JT) - MAKE_CASE(ARMISD::RET_GLUE) - MAKE_CASE(ARMISD::SERET_GLUE) - MAKE_CASE(ARMISD::INTRET_GLUE) - MAKE_CASE(ARMISD::PIC_ADD) - MAKE_CASE(ARMISD::CMP) - MAKE_CASE(ARMISD::CMN) - MAKE_CASE(ARMISD::CMPZ) - MAKE_CASE(ARMISD::CMPFP) - MAKE_CASE(ARMISD::CMPFPE) - MAKE_CASE(ARMISD::CMPFPw0) - MAKE_CASE(ARMISD::CMPFPEw0) - MAKE_CASE(ARMISD::BCC_i64) - MAKE_CASE(ARMISD::FMSTAT) - MAKE_CASE(ARMISD::CMOV) - MAKE_CASE(ARMISD::SSAT) - MAKE_CASE(ARMISD::USAT) - MAKE_CASE(ARMISD::ASRL) - MAKE_CASE(ARMISD::LSRL) - MAKE_CASE(ARMISD::LSLL) - MAKE_CASE(ARMISD::LSLS) - MAKE_CASE(ARMISD::LSRS1) - MAKE_CASE(ARMISD::ASRS1) - MAKE_CASE(ARMISD::RRX) - MAKE_CASE(ARMISD::ADDC) - MAKE_CASE(ARMISD::ADDE) - MAKE_CASE(ARMISD::SUBC) - MAKE_CASE(ARMISD::SUBE) - MAKE_CASE(ARMISD::VMOVRRD) - MAKE_CASE(ARMISD::VMOVDRR) - MAKE_CASE(ARMISD::VMOVhr) - MAKE_CASE(ARMISD::VMOVrh) - MAKE_CASE(ARMISD::VMOVSR) - MAKE_CASE(ARMISD::EH_SJLJ_SETJMP) - MAKE_CASE(ARMISD::EH_SJLJ_LONGJMP) - MAKE_CASE(ARMISD::EH_SJLJ_SETUP_DISPATCH) - MAKE_CASE(ARMISD::TC_RETURN) - MAKE_CASE(ARMISD::THREAD_POINTER) - MAKE_CASE(ARMISD::DYN_ALLOC) - MAKE_CASE(ARMISD::MEMBARRIER_MCR) - MAKE_CASE(ARMISD::PRELOAD) - MAKE_CASE(ARMISD::LDRD) - MAKE_CASE(ARMISD::STRD) - MAKE_CASE(ARMISD::WIN__CHKSTK) - MAKE_CASE(ARMISD::WIN__DBZCHK) - MAKE_CASE(ARMISD::PREDICATE_CAST) - MAKE_CASE(ARMISD::VECTOR_REG_CAST) - MAKE_CASE(ARMISD::MVESEXT) - MAKE_CASE(ARMISD::MVEZEXT) - MAKE_CASE(ARMISD::MVETRUNC) - MAKE_CASE(ARMISD::VCMP) - MAKE_CASE(ARMISD::VCMPZ) - MAKE_CASE(ARMISD::VTST) - MAKE_CASE(ARMISD::VSHLs) - MAKE_CASE(ARMISD::VSHLu) - MAKE_CASE(ARMISD::VSHLIMM) - MAKE_CASE(ARMISD::VSHRsIMM) - MAKE_CASE(ARMISD::VSHRuIMM) - MAKE_CASE(ARMISD::VRSHRsIMM) - MAKE_CASE(ARMISD::VRSHRuIMM) - MAKE_CASE(ARMISD::VRSHRNIMM) - MAKE_CASE(ARMISD::VQSHLsIMM) - MAKE_CASE(ARMISD::VQSHLuIMM) - MAKE_CASE(ARMISD::VQSHLsuIMM) - MAKE_CASE(ARMISD::VQSHRNsIMM) - MAKE_CASE(ARMISD::VQSHRNuIMM) - MAKE_CASE(ARMISD::VQSHRNsuIMM) - MAKE_CASE(ARMISD::VQRSHRNsIMM) - MAKE_CASE(ARMISD::VQRSHRNuIMM) - MAKE_CASE(ARMISD::VQRSHRNsuIMM) - MAKE_CASE(ARMISD::VSLIIMM) - MAKE_CASE(ARMISD::VSRIIMM) - MAKE_CASE(ARMISD::VGETLANEu) - MAKE_CASE(ARMISD::VGETLANEs) - MAKE_CASE(ARMISD::VMOVIMM) - MAKE_CASE(ARMISD::VMVNIMM) - MAKE_CASE(ARMISD::VMOVFPIMM) - MAKE_CASE(ARMISD::VDUP) - MAKE_CASE(ARMISD::VDUPLANE) - MAKE_CASE(ARMISD::VEXT) - MAKE_CASE(ARMISD::VREV64) - MAKE_CASE(ARMISD::VREV32) - MAKE_CASE(ARMISD::VREV16) - MAKE_CASE(ARMISD::VZIP) - MAKE_CASE(ARMISD::VUZP) - MAKE_CASE(ARMISD::VTRN) - MAKE_CASE(ARMISD::VTBL1) - MAKE_CASE(ARMISD::VTBL2) - MAKE_CASE(ARMISD::VMOVN) - MAKE_CASE(ARMISD::VQMOVNs) - MAKE_CASE(ARMISD::VQMOVNu) - MAKE_CASE(ARMISD::VCVTN) - MAKE_CASE(ARMISD::VCVTL) - MAKE_CASE(ARMISD::VIDUP) - MAKE_CASE(ARMISD::VMULLs) - MAKE_CASE(ARMISD::VMULLu) - MAKE_CASE(ARMISD::VQDMULH) - MAKE_CASE(ARMISD::VADDVs) - MAKE_CASE(ARMISD::VADDVu) - MAKE_CASE(ARMISD::VADDVps) - MAKE_CASE(ARMISD::VADDVpu) - MAKE_CASE(ARMISD::VADDLVs) - MAKE_CASE(ARMISD::VADDLVu) - MAKE_CASE(ARMISD::VADDLVAs) - MAKE_CASE(ARMISD::VADDLVAu) - MAKE_CASE(ARMISD::VADDLVps) - MAKE_CASE(ARMISD::VADDLVpu) - MAKE_CASE(ARMISD::VADDLVAps) - MAKE_CASE(ARMISD::VADDLVApu) - MAKE_CASE(ARMISD::VMLAVs) - MAKE_CASE(ARMISD::VMLAVu) - MAKE_CASE(ARMISD::VMLAVps) - MAKE_CASE(ARMISD::VMLAVpu) - MAKE_CASE(ARMISD::VMLALVs) - MAKE_CASE(ARMISD::VMLALVu) - MAKE_CASE(ARMISD::VMLALVps) - MAKE_CASE(ARMISD::VMLALVpu) - MAKE_CASE(ARMISD::VMLALVAs) - MAKE_CASE(ARMISD::VMLALVAu) - MAKE_CASE(ARMISD::VMLALVAps) - MAKE_CASE(ARMISD::VMLALVApu) - MAKE_CASE(ARMISD::VMINVu) - MAKE_CASE(ARMISD::VMINVs) - MAKE_CASE(ARMISD::VMAXVu) - MAKE_CASE(ARMISD::VMAXVs) - MAKE_CASE(ARMISD::UMAAL) - MAKE_CASE(ARMISD::UMLAL) - MAKE_CASE(ARMISD::SMLAL) - MAKE_CASE(ARMISD::SMLALBB) - MAKE_CASE(ARMISD::SMLALBT) - MAKE_CASE(ARMISD::SMLALTB) - MAKE_CASE(ARMISD::SMLALTT) - MAKE_CASE(ARMISD::SMULWB) - MAKE_CASE(ARMISD::SMULWT) - MAKE_CASE(ARMISD::SMLALD) - MAKE_CASE(ARMISD::SMLALDX) - MAKE_CASE(ARMISD::SMLSLD) - MAKE_CASE(ARMISD::SMLSLDX) - MAKE_CASE(ARMISD::SMMLAR) - MAKE_CASE(ARMISD::SMMLSR) - MAKE_CASE(ARMISD::QADD16b) - MAKE_CASE(ARMISD::QSUB16b) - MAKE_CASE(ARMISD::QADD8b) - MAKE_CASE(ARMISD::QSUB8b) - MAKE_CASE(ARMISD::UQADD16b) - MAKE_CASE(ARMISD::UQSUB16b) - MAKE_CASE(ARMISD::UQADD8b) - MAKE_CASE(ARMISD::UQSUB8b) - MAKE_CASE(ARMISD::BUILD_VECTOR) - MAKE_CASE(ARMISD::BFI) - MAKE_CASE(ARMISD::VORRIMM) - MAKE_CASE(ARMISD::VBICIMM) - MAKE_CASE(ARMISD::VBSP) - MAKE_CASE(ARMISD::MEMCPY) - MAKE_CASE(ARMISD::VLD1DUP) - MAKE_CASE(ARMISD::VLD2DUP) - MAKE_CASE(ARMISD::VLD3DUP) - MAKE_CASE(ARMISD::VLD4DUP) - MAKE_CASE(ARMISD::VLD1_UPD) - MAKE_CASE(ARMISD::VLD2_UPD) - MAKE_CASE(ARMISD::VLD3_UPD) - MAKE_CASE(ARMISD::VLD4_UPD) - MAKE_CASE(ARMISD::VLD1x2_UPD) - MAKE_CASE(ARMISD::VLD1x3_UPD) - MAKE_CASE(ARMISD::VLD1x4_UPD) - MAKE_CASE(ARMISD::VLD2LN_UPD) - MAKE_CASE(ARMISD::VLD3LN_UPD) - MAKE_CASE(ARMISD::VLD4LN_UPD) - MAKE_CASE(ARMISD::VLD1DUP_UPD) - MAKE_CASE(ARMISD::VLD2DUP_UPD) - MAKE_CASE(ARMISD::VLD3DUP_UPD) - MAKE_CASE(ARMISD::VLD4DUP_UPD) - MAKE_CASE(ARMISD::VST1_UPD) - MAKE_CASE(ARMISD::VST2_UPD) - MAKE_CASE(ARMISD::VST3_UPD) - MAKE_CASE(ARMISD::VST4_UPD) - MAKE_CASE(ARMISD::VST1x2_UPD) - MAKE_CASE(ARMISD::VST1x3_UPD) - MAKE_CASE(ARMISD::VST1x4_UPD) - MAKE_CASE(ARMISD::VST2LN_UPD) - MAKE_CASE(ARMISD::VST3LN_UPD) - MAKE_CASE(ARMISD::VST4LN_UPD) - MAKE_CASE(ARMISD::WLS) - MAKE_CASE(ARMISD::WLSSETUP) - MAKE_CASE(ARMISD::LE) - MAKE_CASE(ARMISD::LOOP_DEC) - MAKE_CASE(ARMISD::CSINV) - MAKE_CASE(ARMISD::CSNEG) - MAKE_CASE(ARMISD::CSINC) - MAKE_CASE(ARMISD::MEMCPYLOOP) - MAKE_CASE(ARMISD::MEMSETLOOP) -#undef MAKE_CASE - } - return nullptr; -} - EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const { if (!VT.isVector()) @@ -3344,8 +3130,8 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, return LowerInterruptReturn(RetOps, dl, DAG); } - ARMISD::NodeType RetNode = AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : - ARMISD::RET_GLUE; + unsigned RetNode = + AFI->isCmseNSEntryFunction() ? ARMISD::SERET_GLUE : ARMISD::RET_GLUE; return DAG.getNode(RetNode, dl, MVT::Other, RetOps); } @@ -4861,7 +4647,7 @@ SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, } } - ARMISD::NodeType CompareType; + unsigned CompareType; switch (CondCode) { default: CompareType = ARMISD::CMP; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index bc2fec3c1bdb5..8191eb40a712a 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -51,319 +51,6 @@ class TargetMachine; class TargetRegisterInfo; class VectorType; - namespace ARMISD { - - // ARM Specific DAG Nodes - enum NodeType : unsigned { - // Start the numbering where the builtin ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - Wrapper, // Wrapper - A wrapper node for TargetConstantPool, - // TargetExternalSymbol, and TargetGlobalAddress. - WrapperPIC, // WrapperPIC - A wrapper node for TargetGlobalAddress in - // PIC mode. - WrapperJT, // WrapperJT - A wrapper node for TargetJumpTable - - // Add pseudo op to model memcpy for struct byval. - COPY_STRUCT_BYVAL, - - CALL, // Function call. - CALL_PRED, // Function call that's predicable. - CALL_NOLINK, // Function call with branch not branch-and-link. - tSECALL, // CMSE non-secure function call. - t2CALL_BTI, // Thumb function call followed by BTI instruction. - BRCOND, // Conditional branch. - BR_JT, // Jumptable branch. - BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). - RET_GLUE, // Return with a flag operand. - SERET_GLUE, // CMSE Entry function return with a flag operand. - INTRET_GLUE, // Interrupt return with an LR-offset and a flag operand. - - PIC_ADD, // Add with a PC operand and a PIC label. - - ASRL, // MVE long arithmetic shift right. - LSRL, // MVE long shift right. - LSLL, // MVE long shift left. - - CMP, // ARM compare instructions. - CMN, // ARM CMN instructions. - CMPZ, // ARM compare that sets only Z flag. - CMPFP, // ARM VFP compare instruction, sets FPSCR. - CMPFPE, // ARM VFP signalling compare instruction, sets FPSCR. - CMPFPw0, // ARM VFP compare against zero instruction, sets FPSCR. - CMPFPEw0, // ARM VFP signalling compare against zero instruction, sets - // FPSCR. - FMSTAT, // ARM fmstat instruction. - - CMOV, // ARM conditional move instructions. - - SSAT, // Signed saturation - USAT, // Unsigned saturation - - BCC_i64, - - LSLS, // Flag-setting shift left. - LSRS1, // Flag-setting logical shift right by one bit. - ASRS1, // Flag-setting arithmetic shift right by one bit. - RRX, // Shift right one bit with carry in. - - ADDC, // Add with carry - ADDE, // Add using carry - SUBC, // Sub with carry - SUBE, // Sub using carry - - VMOVRRD, // double to two gprs. - VMOVDRR, // Two gprs to double. - VMOVSR, // move gpr to single, used for f32 literal constructed in a gpr - - EH_SJLJ_SETJMP, // SjLj exception handling setjmp. - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. - EH_SJLJ_SETUP_DISPATCH, // SjLj exception handling setup_dispatch. - - TC_RETURN, // Tail call return pseudo. - - THREAD_POINTER, - - DYN_ALLOC, // Dynamic allocation on the stack. - - MEMBARRIER_MCR, // Memory barrier (MCR) - - PRELOAD, // Preload - - WIN__CHKSTK, // Windows' __chkstk call to do stack probing. - WIN__DBZCHK, // Windows' divide by zero check - - WLS, // Low-overhead loops, While Loop Start branch. See t2WhileLoopStart - WLSSETUP, // Setup for the iteration count of a WLS. See t2WhileLoopSetup. - LOOP_DEC, // Really a part of LE, performs the sub - LE, // Low-overhead loops, Loop End - - PREDICATE_CAST, // Predicate cast for MVE i1 types - VECTOR_REG_CAST, // Reinterpret the current contents of a vector register - - MVESEXT, // Legalization aids for extending a vector into two/four vectors. - MVEZEXT, // or truncating two/four vectors into one. Eventually becomes - MVETRUNC, // stack store/load sequence, if not optimized to anything else. - - VCMP, // Vector compare. - VCMPZ, // Vector compare to zero. - VTST, // Vector test bits. - - // Vector shift by vector - VSHLs, // ...left/right by signed - VSHLu, // ...left/right by unsigned - - // Vector shift by immediate: - VSHLIMM, // ...left - VSHRsIMM, // ...right (signed) - VSHRuIMM, // ...right (unsigned) - - // Vector rounding shift by immediate: - VRSHRsIMM, // ...right (signed) - VRSHRuIMM, // ...right (unsigned) - VRSHRNIMM, // ...right narrow - - // Vector saturating shift by immediate: - VQSHLsIMM, // ...left (signed) - VQSHLuIMM, // ...left (unsigned) - VQSHLsuIMM, // ...left (signed to unsigned) - VQSHRNsIMM, // ...right narrow (signed) - VQSHRNuIMM, // ...right narrow (unsigned) - VQSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector saturating rounding shift by immediate: - VQRSHRNsIMM, // ...right narrow (signed) - VQRSHRNuIMM, // ...right narrow (unsigned) - VQRSHRNsuIMM, // ...right narrow (signed to unsigned) - - // Vector shift and insert: - VSLIIMM, // ...left - VSRIIMM, // ...right - - // Vector get lane (VMOV scalar to ARM core register) - // (These are used for 8- and 16-bit element types only.) - VGETLANEu, // zero-extend vector extract element - VGETLANEs, // sign-extend vector extract element - - // Vector move immediate and move negated immediate: - VMOVIMM, - VMVNIMM, - - // Vector move f32 immediate: - VMOVFPIMM, - - // Move H <-> R, clearing top 16 bits - VMOVrh, - VMOVhr, - - // Vector duplicate: - VDUP, - VDUPLANE, - - // Vector shuffles: - VEXT, // extract - VREV64, // reverse elements within 64-bit doublewords - VREV32, // reverse elements within 32-bit words - VREV16, // reverse elements within 16-bit halfwords - VZIP, // zip (interleave) - VUZP, // unzip (deinterleave) - VTRN, // transpose - VTBL1, // 1-register shuffle with mask - VTBL2, // 2-register shuffle with mask - VMOVN, // MVE vmovn - - // MVE Saturating truncates - VQMOVNs, // Vector (V) Saturating (Q) Move and Narrow (N), signed (s) - VQMOVNu, // Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) - - // MVE float <> half converts - VCVTN, // MVE vcvt f32 -> f16, truncating into either the bottom or top - // lanes - VCVTL, // MVE vcvt f16 -> f32, extending from either the bottom or top lanes - - // MVE VIDUP instruction, taking a start value and increment. - VIDUP, - - // Vector multiply long: - VMULLs, // ...signed - VMULLu, // ...unsigned - - VQDMULH, // MVE vqdmulh instruction - - // MVE reductions - VADDVs, // sign- or zero-extend the elements of a vector to i32, - VADDVu, // add them all together, and return an i32 of their sum - VADDVps, // Same as VADDV[su] but with a v4i1 predicate mask - VADDVpu, - VADDLVs, // sign- or zero-extend elements to i64 and sum, returning - VADDLVu, // the low and high 32-bit halves of the sum - VADDLVAs, // Same as VADDLV[su] but also add an input accumulator - VADDLVAu, // provided as low and high halves - VADDLVps, // Same as VADDLV[su] but with a v4i1 predicate mask - VADDLVpu, - VADDLVAps, // Same as VADDLVp[su] but with a v4i1 predicate mask - VADDLVApu, - VMLAVs, // sign- or zero-extend the elements of two vectors to i32, multiply - VMLAVu, // them and add the results together, returning an i32 of the sum - VMLAVps, // Same as VMLAV[su] with a v4i1 predicate mask - VMLAVpu, - VMLALVs, // Same as VMLAV but with i64, returning the low and - VMLALVu, // high 32-bit halves of the sum - VMLALVps, // Same as VMLALV[su] with a v4i1 predicate mask - VMLALVpu, - VMLALVAs, // Same as VMLALV but also add an input accumulator - VMLALVAu, // provided as low and high halves - VMLALVAps, // Same as VMLALVA[su] with a v4i1 predicate mask - VMLALVApu, - VMINVu, // Find minimum unsigned value of a vector and register - VMINVs, // Find minimum signed value of a vector and register - VMAXVu, // Find maximum unsigned value of a vector and register - VMAXVs, // Find maximum signed value of a vector and register - - SMULWB, // Signed multiply word by half word, bottom - SMULWT, // Signed multiply word by half word, top - UMLAL, // 64bit Unsigned Accumulate Multiply - SMLAL, // 64bit Signed Accumulate Multiply - UMAAL, // 64-bit Unsigned Accumulate Accumulate Multiply - SMLALBB, // 64-bit signed accumulate multiply bottom, bottom 16 - SMLALBT, // 64-bit signed accumulate multiply bottom, top 16 - SMLALTB, // 64-bit signed accumulate multiply top, bottom 16 - SMLALTT, // 64-bit signed accumulate multiply top, top 16 - SMLALD, // Signed multiply accumulate long dual - SMLALDX, // Signed multiply accumulate long dual exchange - SMLSLD, // Signed multiply subtract long dual - SMLSLDX, // Signed multiply subtract long dual exchange - SMMLAR, // Signed multiply long, round and add - SMMLSR, // Signed multiply long, subtract and round - - // Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b - // stands for. - QADD8b, - QSUB8b, - QADD16b, - QSUB16b, - UQADD8b, - UQSUB8b, - UQADD16b, - UQSUB16b, - - // Operands of the standard BUILD_VECTOR node are not legalized, which - // is fine if BUILD_VECTORs are always lowered to shuffles or other - // operations, but for ARM some BUILD_VECTORs are legal as-is and their - // operands need to be legalized. Define an ARM-specific version of - // BUILD_VECTOR for this purpose. - BUILD_VECTOR, - - // Bit-field insert - BFI, - - // Vector OR with immediate - VORRIMM, - // Vector AND with NOT of immediate - VBICIMM, - - // Pseudo vector bitwise select - VBSP, - - // Pseudo-instruction representing a memory copy using ldm/stm - // instructions. - MEMCPY, - - // Pseudo-instruction representing a memory copy using a tail predicated - // loop - MEMCPYLOOP, - // Pseudo-instruction representing a memset using a tail predicated - // loop - MEMSETLOOP, - - // V8.1MMainline condition select - CSINV, // Conditional select invert. - CSNEG, // Conditional select negate. - CSINC, // Conditional select increment. - - // Vector load N-element structure to all lanes: - FIRST_MEMORY_OPCODE, - VLD1DUP = FIRST_MEMORY_OPCODE, - VLD2DUP, - VLD3DUP, - VLD4DUP, - - // NEON loads with post-increment base updates: - VLD1_UPD, - VLD2_UPD, - VLD3_UPD, - VLD4_UPD, - VLD2LN_UPD, - VLD3LN_UPD, - VLD4LN_UPD, - VLD1DUP_UPD, - VLD2DUP_UPD, - VLD3DUP_UPD, - VLD4DUP_UPD, - VLD1x2_UPD, - VLD1x3_UPD, - VLD1x4_UPD, - - // NEON stores with post-increment base updates: - VST1_UPD, - VST2_UPD, - VST3_UPD, - VST4_UPD, - VST2LN_UPD, - VST3LN_UPD, - VST4LN_UPD, - VST1x2_UPD, - VST1x3_UPD, - VST1x4_UPD, - - // Load/Store of dual registers - LDRD, - STRD, - LAST_MEMORY_OPCODE = STRD, - }; - - } // end namespace ARMISD - namespace ARM { /// Possible values of current rounding mode, which is specified in bits /// 23:22 of FPSCR. @@ -427,8 +114,6 @@ class VectorType; void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, SelectionDAG &DAG) const override; - const char *getTargetNodeName(unsigned Opcode) const override; - bool isSelectSupported(SelectSupportKind Kind) const override { // ARM does not support scalar condition selects on vectors. return (Kind != ScalarCondVectorVal); diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td index f7176a65d8163..ddc89415cfb20 100644 --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -40,7 +40,7 @@ def SDT_ARMCMov : SDTypeProfile<1, 4, [ SDTCisVT<4, FlagsVT>, // in flags ]>; -def SDT_ARMBrcond : SDTypeProfile<0, 2, [ +def SDT_ARMBrcond : SDTypeProfile<0, 3, [ SDTCisVT<0, OtherVT>, // target basic block SDTCisVT<1, CondCodeVT>, // condition code SDTCisVT<2, FlagsVT>, // in flags @@ -133,9 +133,16 @@ def SDT_ARMIntShiftParts : SDTypeProfile<2, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<4>]>; +// Signed multiply accumulate long dual def ARMSmlald : SDNode<"ARMISD::SMLALD", SDT_LongMac>; + +// Signed multiply accumulate long dual exchange def ARMSmlaldx : SDNode<"ARMISD::SMLALDX", SDT_LongMac>; + +// Signed multiply subtract long dual def ARMSmlsld : SDNode<"ARMISD::SMLSLD", SDT_LongMac>; + +// Signed multiply subtract long dual exchange def ARMSmlsldx : SDNode<"ARMISD::SMLSLDX", SDT_LongMac>; def SDT_ARMCSel : SDTypeProfile<1, 4, [ @@ -146,8 +153,13 @@ def SDT_ARMCSel : SDTypeProfile<1, 4, [ SDTCisVT<3, FlagsVT> // in flags ]>; +// Conditional select invert. def ARMcsinv : SDNode<"ARMISD::CSINV", SDT_ARMCSel>; + +// Conditional select negate. def ARMcsneg : SDNode<"ARMISD::CSNEG", SDT_ARMCSel>; + +// Conditional select increment. def ARMcsinc : SDNode<"ARMISD::CSINC", SDT_ARMCSel>; def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, @@ -155,110 +167,197 @@ def SDT_MulHSR : SDTypeProfile<1, 3, [SDTCisVT<0,i32>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; +// Signed multiply long, round and add def ARMsmmlar : SDNode<"ARMISD::SMMLAR", SDT_MulHSR>; + +// Signed multiply long, subtract and round def ARMsmmlsr : SDNode<"ARMISD::SMMLSR", SDT_MulHSR>; -// Node definitions. + +// Wrapper - A wrapper node for TargetConstantPool, +// TargetExternalSymbol, and TargetGlobalAddress. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; + +// WrapperPIC - A wrapper node for TargetGlobalAddress in +// PIC mode. def ARMWrapperPIC : SDNode<"ARMISD::WrapperPIC", SDTIntUnaryOp>; + +// WrapperJT - A wrapper node for TargetJumpTable def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntUnaryOp>; def ARMcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_ARMCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; + +// Add pseudo op to model memcpy for struct byval. def ARMcopystructbyval : SDNode<"ARMISD::COPY_STRUCT_BYVAL" , SDT_ARMStructByVal, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Function call. def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call that's predicable. def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; + +// Function call with branch not branch-and-link. def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +// Return with a flag operand. def ARMretglue : SDNode<"ARMISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// CMSE Entry function return with a flag operand. def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// Interrupt return with an LR-offset and a flag operand. def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; + +// ARM conditional move instructions. def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>; +// Signed saturation def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>; +// Unsigned saturation def ARMusat : SDNode<"ARMISD::USAT", SDTIntSatNoShOp, []>; +// Conditional branch. def ARMbrcond : SDNode<"ARMISD::BRCOND", SDT_ARMBrcond, [SDNPHasChain]>; +// Jumptable branch. def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT, [SDNPHasChain]>; + +// Jumptable branch (2 level - jumptable entry is a jump). def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT, [SDNPHasChain]>; def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64, [SDNPHasChain]>; +// ARM compare instructions. def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp>; +// ARM CMN instructions. def ARMcmn : SDNode<"ARMISD::CMN", SDT_ARMCmp>; +// ARM compare that sets only Z flag. def ARMcmpZ : SDNode<"ARMISD::CMPZ", SDT_ARMCmp, [SDNPCommutative]>; +// Add with a PC operand and a PIC label. def ARMpic_add : SDNode<"ARMISD::PIC_ADD", SDT_ARMPICAdd>; +// MVE long arithmetic shift right. def ARMasrl : SDNode<"ARMISD::ASRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift right. def ARMlsrl : SDNode<"ARMISD::LSRL", SDT_ARMIntShiftParts, []>; + +// MVE long shift left. def ARMlsll : SDNode<"ARMISD::LSLL", SDT_ARMIntShiftParts, []>; +// Flag-setting logical shift right by one bit. def ARMlsrs1 : SDNode<"ARMISD::LSRS1", SDTIntUnaryOpWithFlagsOut>; + +// Flag-setting arithmetic shift right by one bit. def ARMasrs1 : SDNode<"ARMISD::ASRS1", SDTIntUnaryOpWithFlagsOut>; + +// Shift right one bit with carry in. def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOpWithFlagsIn>; +// Add with carry def ARMaddc : SDNode<"ARMISD::ADDC", SDTBinaryArithWithFlags, [SDNPCommutative]>; + +// Sub with carry def ARMsubc : SDNode<"ARMISD::SUBC", SDTBinaryArithWithFlags>; + +// Flag-setting shift left. def ARMlsls : SDNode<"ARMISD::LSLS", SDTBinaryArithWithFlags>; + +// Add using carry def ARMadde : SDNode<"ARMISD::ADDE", SDTBinaryArithWithFlagsInOut>; + +// Sub using carry def ARMsube : SDNode<"ARMISD::SUBE", SDTBinaryArithWithFlagsInOut>; def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>; + +// SjLj exception handling setjmp. def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling longjmp. def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; + +// SjLj exception handling setup_dispatch. def ARMeh_sjlj_setup_dispatch: SDNode<"ARMISD::EH_SJLJ_SETUP_DISPATCH", SDT_ARMEH_SJLJ_SetupDispatch, [SDNPHasChain, SDNPSideEffect]>; +// Memory barrier (MCR) def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; + +// Preload def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>; +// Tail call return pseudo. def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; +// Bit-field insert def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +// Pseudo-instruction representing a memory copy using ldm/stm instructions. def ARMmemcopy : SDNode<"ARMISD::MEMCPY", SDT_ARMMEMCPY, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad]>; +// Signed multiply word by half word, bottom def ARMsmulwb : SDNode<"ARMISD::SMULWB", SDTIntBinOp, []>; + +// Signed multiply word by half word, top def ARMsmulwt : SDNode<"ARMISD::SMULWT", SDTIntBinOp, []>; + +// 64bit Unsigned Accumulate Multiply +def ARMumlal : SDNode<"ARMISD::UMLAL", SDT_LongMac>; + +// 64bit Signed Accumulate Multiply +def ARMsmlal : SDNode<"ARMISD::SMLAL", SDT_LongMac>; + +// 64-bit Unsigned Accumulate Accumulate Multiply +def ARMumaal : SDNode<"ARMISD::UMAAL", SDT_LongMac>; + +// 64-bit signed accumulate multiply bottom, bottom 16 def ARMsmlalbb : SDNode<"ARMISD::SMLALBB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply bottom, top 16 def ARMsmlalbt : SDNode<"ARMISD::SMLALBT", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, bottom 16 def ARMsmlaltb : SDNode<"ARMISD::SMLALTB", SDT_LongMac, []>; + +// 64-bit signed accumulate multiply top, top 16 def ARMsmlaltt : SDNode<"ARMISD::SMLALTT", SDT_LongMac, []>; +// Single Lane QADD8 and QADD16. Only the bottom lane. That's what the b +// stands for. def ARMqadd8b : SDNode<"ARMISD::QADD8b", SDT_ARMAnd, []>; def ARMqsub8b : SDNode<"ARMISD::QSUB8b", SDT_ARMAnd, []>; def ARMqadd16b : SDNode<"ARMISD::QADD16b", SDT_ARMAnd, []>; @@ -270,13 +369,15 @@ def ARMuqadd16b : SDNode<"ARMISD::UQADD16b", SDT_ARMAnd, []>; def ARMuqsub16b : SDNode<"ARMISD::UQSUB16b", SDT_ARMAnd, []>; def SDT_ARMldrd : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; -def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; - def SDT_ARMstrd : SDTypeProfile<0, 3, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>; + +// Load/Store of dual registers +def ARMldrd : SDNode<"ARMISD::LDRD", SDT_ARMldrd, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def ARMstrd : SDNode<"ARMISD::STRD", SDT_ARMstrd, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; // Vector operations shared between NEON and MVE +// Vector duplicate def ARMvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>; // VDUPLANE can produce a quad-register result from a double-register source, @@ -287,40 +388,65 @@ def ARMvduplane : SDNode<"ARMISD::VDUPLANE", def SDTARMVIDUP : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + +// MVE VIDUP instruction, taking a start value and increment. def ARMvidup : SDNode<"ARMISD::VIDUP", SDTARMVIDUP>; def SDTARMVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; + +// reverse elements within 64-bit doublewords def ARMvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>; + +// reverse elements within 32-bit words def ARMvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>; + +// reverse elements within 16-bit halfwords def ARMvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>; def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// Vector get lane (VMOV scalar to ARM core register) +// (These are used for 8- and 16-bit element types only.) def ARMvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>; def ARMvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>; def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>; + +// Vector move immediate and move negated immediate def ARMvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>; def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; + +// Vector move f32 immediate def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; + +// Vector OR with immediate def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; + +// Vector AND with NOT of immediate def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,]>; + +// Vector shift by immediate def ARMvshlImm : SDNode<"ARMISD::VSHLIMM", SDTARMVSHIMM>; def ARMvshrsImm : SDNode<"ARMISD::VSHRsIMM", SDTARMVSHIMM>; def ARMvshruImm : SDNode<"ARMISD::VSHRuIMM", SDTARMVSHIMM>; + +// Vector shift by vector def ARMvshls : SDNode<"ARMISD::VSHLs", SDTARMVSH>; def ARMvshlu : SDNode<"ARMISD::VSHLu", SDTARMVSH>; def SDTARMVMULL : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 2>]>; + +// Vector multiply long def ARMvmulls : SDNode<"ARMISD::VMULLs", SDTARMVMULL>; def ARMvmullu : SDNode<"ARMISD::VMULLu", SDTARMVMULL>; @@ -328,9 +454,13 @@ def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisInt<3>]>; def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; +// Vector compare. def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; + +// Vector compare to zero. def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; +// Reinterpret the current contents of a vector register // 'VECTOR_REG_CAST' is an operation that reinterprets the contents of a // vector register as a different vector type, without changing the contents of // the register. It differs from 'bitconvert' in that bitconvert reinterprets @@ -5894,13 +6024,17 @@ def MSRbanked : ABI<0b0001, (outs), (ins banked_reg:$banked, GPRnopc:$Rn), // The main point of having separate instruction are extra unmodelled effects // (compared to ordinary calls) like stack pointer change. +// Windows' __chkstk call to do stack probing. def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, [SDNPHasChain, SDNPSideEffect]>; + let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP], hasNoSchedulingInfo = 1 in def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; +// Windows' divide by zero check def win__dbzchk : SDNode<"ARMISD::WIN__DBZCHK", SDT_WIN__DBZCHK, [SDNPHasChain, SDNPSideEffect, SDNPOutGlue]>; + let usesCustomInserter = 1, Defs = [CPSR], hasNoSchedulingInfo = 1 in def WIN__DBZCHK : PseudoInst<(outs), (ins tGPR:$divisor), NoItinerary, [(win__dbzchk tGPR:$divisor)]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td index e24413465799f..98591fa3f5bd7 100644 --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -683,8 +683,13 @@ class MVE_VADDV, SDTCisVec<1>, SDTCisVec<2> ]>; + +// sign- or zero-extend the elements of a vector to i32, +// add them all together, and return an i32 of their sum def ARMVADDVs : SDNode<"ARMISD::VADDVs", SDTVecReduce>; def ARMVADDVu : SDNode<"ARMISD::VADDVu", SDTVecReduce>; + +// Same as VADDV[su] but with a v4i1 predicate mask def ARMVADDVps : SDNode<"ARMISD::VADDVps", SDTVecReduceP>; def ARMVADDVpu : SDNode<"ARMISD::VADDVpu", SDTVecReduceP>; @@ -806,9 +811,19 @@ multiclass MVE_VADDLV_A { defvar InstN = !cast(NAME # "no_acc"); defvar letter = VTI.SuffixLetter; + + // sign- or zero-extend elements to i64 and sum, returning + // the low and high 32-bit halves of the sum defvar ARMVADDLV = SDNode<"ARMISD::VADDLV" # letter, SDTVecReduceL>; + + // Same as VADDLV[su] but also add an input accumulator + // provided as low and high halves defvar ARMVADDLVA = SDNode<"ARMISD::VADDLVA" # letter, SDTVecReduceLA>; + + // Same as VADDLV[su] but with a v4i1 predicate mask defvar ARMVADDLVp = SDNode<"ARMISD::VADDLVp" # letter, SDTVecReduceLP>; + + // Same as VADDLVp[su] but with a v4i1 predicate mask defvar ARMVADDLVAp = SDNode<"ARMISD::VADDLVAp" # letter, SDTVecReduceLPA>; let Predicates = [HasMVEInt] in { @@ -943,9 +958,17 @@ multiclass MVE_VMINMAXV_ty { def SDTVecReduceR : SDTypeProfile<1, 2, [ // Reduction of an integer and vector into an integer SDTCisInt<0>, SDTCisInt<1>, SDTCisVec<2> ]>; + +// Find minimum unsigned value of a vector and register def ARMVMINVu : SDNode<"ARMISD::VMINVu", SDTVecReduceR>; + +// Find minimum signed value of a vector and register def ARMVMINVs : SDNode<"ARMISD::VMINVs", SDTVecReduceR>; + +// Find maximum unsigned value of a vector and register def ARMVMAXVu : SDNode<"ARMISD::VMAXVu", SDTVecReduceR>; + +// Find maximum signed value of a vector and register def ARMVMAXVs : SDNode<"ARMISD::VMAXVs", SDTVecReduceR>; defm MVE_VMINV : MVE_VMINMAXV_ty<"vminv", 1, "int_arm_mve_minv">; @@ -1146,16 +1169,31 @@ def SDTVecReduce2LAP : SDTypeProfile<2, 5, [ // VMLALVA SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>, SDTCisInt<3>, SDTCisVec<4>, SDTCisVec<5>, SDTCisVec<6> ]>; + +// sign- or zero-extend the elements of two vectors to i32, multiply +// them and add the results together, returning an i32 of the sum def ARMVMLAVs : SDNode<"ARMISD::VMLAVs", SDTVecReduce2>; def ARMVMLAVu : SDNode<"ARMISD::VMLAVu", SDTVecReduce2>; + +// Same as VMLAV but with i64, returning the low and +// high 32-bit halves of the sum def ARMVMLALVs : SDNode<"ARMISD::VMLALVs", SDTVecReduce2L>; def ARMVMLALVu : SDNode<"ARMISD::VMLALVu", SDTVecReduce2L>; + +// Same as VMLALV but also add an input accumulator +// provided as low and high halves def ARMVMLALVAs : SDNode<"ARMISD::VMLALVAs", SDTVecReduce2LA>; def ARMVMLALVAu : SDNode<"ARMISD::VMLALVAu", SDTVecReduce2LA>; + +// Same as VMLAV[su] with a v4i1 predicate mask def ARMVMLAVps : SDNode<"ARMISD::VMLAVps", SDTVecReduce2P>; def ARMVMLAVpu : SDNode<"ARMISD::VMLAVpu", SDTVecReduce2P>; + +// Same as VMLALV[su] with a v4i1 predicate mask def ARMVMLALVps : SDNode<"ARMISD::VMLALVps", SDTVecReduce2LP>; def ARMVMLALVpu : SDNode<"ARMISD::VMLALVpu", SDTVecReduce2LP>; + +// Same as VMLALVA[su] with a v4i1 predicate mask def ARMVMLALVAps : SDNode<"ARMISD::VMLALVAps", SDTVecReduce2LAP>; def ARMVMLALVApu : SDNode<"ARMISD::VMLALVApu", SDTVecReduce2LAP>; @@ -1997,6 +2035,7 @@ class MVE_VQxDMULH_Base size, bit rounding, let validForTailPredication = 1; } +// MVE vqdmulh instruction def MVEvqdmulh : SDNode<"ARMISD::VQDMULH", SDTIntBinOp>; multiclass MVE_VQxDMULH_m; } +// Predicate cast for MVE i1 types // Occasionally we need to cast between a i32 and a boolean vector, for // example when moving between rGPR and VPR.P0 as part of predicate vector // shuffles. We also sometimes need to cast between different predicate @@ -4810,6 +4850,7 @@ defm MVE_VQMOVNu32 : MVE_VxMOVxN_halves<"vqmovn", "u32", 0b1, 0b1, 0b01>; defm MVE_VQMOVUNs16 : MVE_VxMOVxN_halves<"vqmovun", "s16", 0b0, 0b0, 0b00>; defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; +// MVE vmovn def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; multiclass MVE_VMOVN_p; def SDTARMVMOVNQ : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisVec<2>, SDTCisVT<3, i32>]>; + +// Vector (V) Saturating (Q) Move and Narrow (N), signed (s) def MVEvqmovns : SDNode<"ARMISD::VQMOVNs", SDTARMVMOVNQ>; + +// Vector (V) Saturating (Q) Move and Narrow (N), unsigned (u) def MVEvqmovnu : SDNode<"ARMISD::VQMOVNu", SDTARMVMOVNQ>; let Predicates = [HasMVEInt] in { @@ -4938,7 +4983,11 @@ class MVE_VCVT_ff, SDTCisVec<1>, SDTCisVT<2, i32>]>; + +// MVE vcvt f32 -> f16, truncating into either the bottom or top lanes def MVEvcvtn : SDNode<"ARMISD::VCVTN", SDTARMVMOVNQ>; + +// MVE vcvt f16 -> f32, extending from either the bottom or top lanes def MVEvcvtl : SDNode<"ARMISD::VCVTL", SDTARMVCVTL>; multiclass MVE_VCVT_f2h_m { @@ -6865,6 +6914,9 @@ class MVE_WLSTP size> def SDT_MVEMEMCPYLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisPtrTy<1>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memory copy using a tail predicated +// loop def MVE_MEMCPYLOOPNODE : SDNode<"ARMISD::MEMCPYLOOP", SDT_MVEMEMCPYLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; @@ -6877,6 +6929,9 @@ let usesCustomInserter = 1, hasNoSchedulingInfo = 1, Defs = [CPSR] in { def SDT_MVEMEMSETLOOPNODE : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisVT<1, v16i8>, SDTCisVT<2, i32>]>; + +// Pseudo-instruction representing a memset using a tail predicated +// loop def MVE_MEMSETLOOPNODE : SDNode<"ARMISD::MEMSETLOOP", SDT_MVEMEMSETLOOPNODE, [SDNPHasChain, SDNPMayStore, SDNPMayLoad]>; diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td index 37f0103363b9a..90e74a5f54f7b 100644 --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -475,6 +475,8 @@ def non_word_alignedstore : PatFrag<(ops node:$val, node:$ptr), //===----------------------------------------------------------------------===// def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; + +// Vector test bits. def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and @@ -487,10 +489,12 @@ def SDTARMVSHINSIMM : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, def NEONvshrnImm : SDNode<"ARMISD::VSHRNIMM", SDTARMVSHXIMM>; +// Vector rounding shift by immediate def NEONvrshrsImm : SDNode<"ARMISD::VRSHRsIMM", SDTARMVSHIMM>; def NEONvrshruImm : SDNode<"ARMISD::VRSHRuIMM", SDTARMVSHIMM>; def NEONvrshrnImm : SDNode<"ARMISD::VRSHRNIMM", SDTARMVSHXIMM>; +// Vector saturating shift by immediate def NEONvqshlsImm : SDNode<"ARMISD::VQSHLsIMM", SDTARMVSHIMM>; def NEONvqshluImm : SDNode<"ARMISD::VQSHLuIMM", SDTARMVSHIMM>; def NEONvqshlsuImm : SDNode<"ARMISD::VQSHLsuIMM", SDTARMVSHIMM>; @@ -498,13 +502,16 @@ def NEONvqshrnsImm : SDNode<"ARMISD::VQSHRNsIMM", SDTARMVSHXIMM>; def NEONvqshrnuImm : SDNode<"ARMISD::VQSHRNuIMM", SDTARMVSHXIMM>; def NEONvqshrnsuImm : SDNode<"ARMISD::VQSHRNsuIMM", SDTARMVSHXIMM>; +// Vector saturating rounding shift by immediate def NEONvqrshrnsImm : SDNode<"ARMISD::VQRSHRNsIMM", SDTARMVSHXIMM>; def NEONvqrshrnuImm : SDNode<"ARMISD::VQRSHRNuIMM", SDTARMVSHXIMM>; def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>; +// Vector shift and insert def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; +// Pseudo vector bitwise select def NEONvbsp : SDNode<"ARMISD::VBSP", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -518,15 +525,25 @@ def NEONvext : SDNode<"ARMISD::VEXT", SDTARMVEXT>; def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>; + +// zip (interleave) def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; + +// unzip (deinterleave) def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; + +// transpose def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; def SDTARMVTBL1 : SDTypeProfile<1, 2, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>]>; def SDTARMVTBL2 : SDTypeProfile<1, 3, [SDTCisVT<0, v8i8>, SDTCisVT<1, v8i8>, SDTCisVT<2, v8i8>, SDTCisVT<3, v8i8>]>; + +// 1-register shuffle with mask def NEONvtbl1 : SDNode<"ARMISD::VTBL1", SDTARMVTBL1>; + +// 2-register shuffle with mask def NEONvtbl2 : SDNode<"ARMISD::VTBL2", SDTARMVTBL2>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb.td b/llvm/lib/Target/ARM/ARMInstrThumb.td index 0c5ea3e0fa8d5..55b0d9e1c01fc 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -14,6 +14,7 @@ // Thumb specific DAG Nodes. // +// CMSE non-secure function call. def ARMtsecall : SDNode<"ARMISD::tSECALL", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c229c8e4491df..317959c0342f7 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5581,6 +5581,24 @@ class t2LOL let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +// Setup for the iteration count of a WLS. See t2WhileLoopSetup. +def arm_wlssetup + : SDNode<"ARMISD::WLSSETUP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; + +// Low-overhead loops, While Loop Start branch. See t2WhileLoopStart +def arm_wls : SDNode<"ARMISD::WLS", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + +// Really a part of LE, performs the sub +def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + +// Low-overhead loops, Loop End +def arm_le : SDNode<"ARMISD::LE", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + let isNotDuplicable = 1 in { def t2WLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn, wlslabel_u11:$label), @@ -5651,15 +5669,17 @@ def t2DoLoopStartTP : // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). def t2WhileLoopSetup : - t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>; + t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, + [(set i32:$lr, (arm_wlssetup i32:$tc))]>; // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and // t2LoopEnd together represent a LE instruction. Ideally these are converted // to a t2LoopEndDec which is lowered as a single instruction. let hasSideEffects = 0 in def t2LoopDec : - t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), - 4, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, + [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>, + Sched<[WriteBr]>; let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned @@ -5667,8 +5687,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { def t2WhileLoopStart : t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 4, IIC_Br, []>, - Sched<[WriteBr]>; + 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It // is lowered in the ARMLowOverheadLoops pass providing the branches are within @@ -5690,8 +5710,9 @@ def t2WhileLoopStartTP : // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair. def t2LoopEnd : - t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 8, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), + 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // The combination of a t2LoopDec and t2LoopEnd, performing both the LR // decrement and branch as a single instruction. Is lowered to a LE or @@ -5873,6 +5894,7 @@ def t2AUT : PACBTIHintSpaceUseInst<"aut", 0b00101101> { let hasSideEffects = 1; } +// Thumb function call followed by BTI instruction. def ARMt2CallBTI : SDNode<"ARMISD::t2CALL_BTI", SDT_ARMcall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index e2cc97b7b4634..65c61c259d465 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -28,11 +28,20 @@ def SDT_VMOVRRD : SDTypeProfile<2, 1, [SDTCisVT<0, i32>, SDTCisSameAs<0, 1>, def SDT_VMOVSR : SDTypeProfile<1, 1, [SDTCisVT<0, f32>, SDTCisVT<1, i32>]>; +// ARM VFP compare instruction, sets FPSCR. def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_CMPFP>; + +// ARM VFP compare against zero instruction, sets FPSCR. def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0", SDT_CMPFP0>; + +// ARM VFP signalling compare instruction, sets FPSCR. def arm_cmpfpe : SDNode<"ARMISD::CMPFPE", SDT_CMPFP>; + +// ARM VFP signalling compare against zero instruction, sets +// FPSCR. def arm_cmpfpe0 : SDNode<"ARMISD::CMPFPEw0", SDT_CMPFP0>; +// ARM fmstat instruction. def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTypeProfile<1, 1, [ SDTCisVT<0, FlagsVT>, // out flags @@ -40,12 +49,19 @@ def arm_fmstat : SDNode<"ARMISD::FMSTAT", ]> >; +// Two gprs to double. def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; + +// double to two gprs. def arm_fmrrd : SDNode<"ARMISD::VMOVRRD", SDT_VMOVRRD>; + +// move gpr to single, used for f32 literal constructed in a gpr def arm_vmovsr : SDNode<"ARMISD::VMOVSR", SDT_VMOVSR>; def SDT_VMOVhr : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, i32>] >; def SDT_VMOVrh : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisFP<1>] >; + +// Move H <-> R, clearing top 16 bits def arm_vmovhr : SDNode<"ARMISD::VMOVhr", SDT_VMOVhr>; def arm_vmovrh : SDNode<"ARMISD::VMOVrh", SDT_VMOVrh>; diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp index bf7c962f02efc..0646c5a3634cb 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -10,9 +10,14 @@ // //===----------------------------------------------------------------------===// +#include "ARMSelectionDAGInfo.h" #include "ARMTargetTransformInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/CommandLine.h" + +#define GET_SDNODE_DESC +#include "ARMGenSDNodeInfo.inc" + using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" @@ -30,9 +35,79 @@ static cl::opt EnableMemtransferTPLoop( "Allow (may be subject to certain conditions) " "conversion of memcpy to TP loop."))); +ARMSelectionDAGInfo::ARMSelectionDAGInfo() + : SelectionDAGGenTargetInfo(ARMGenSDNodeInfo) {} + +const char *ARMSelectionDAGInfo::getTargetNodeName(unsigned Opcode) const { +#define MAKE_CASE(V) \ + case V: \ + return #V; + + // These nodes don't have corresponding entries in *.td files yet. + switch (static_cast(Opcode)) { + MAKE_CASE(ARMISD::DYN_ALLOC) + MAKE_CASE(ARMISD::MVESEXT) + MAKE_CASE(ARMISD::MVEZEXT) + MAKE_CASE(ARMISD::MVETRUNC) + MAKE_CASE(ARMISD::BUILD_VECTOR) + MAKE_CASE(ARMISD::VLD1DUP) + MAKE_CASE(ARMISD::VLD2DUP) + MAKE_CASE(ARMISD::VLD3DUP) + MAKE_CASE(ARMISD::VLD4DUP) + MAKE_CASE(ARMISD::VLD1_UPD) + MAKE_CASE(ARMISD::VLD2_UPD) + MAKE_CASE(ARMISD::VLD3_UPD) + MAKE_CASE(ARMISD::VLD4_UPD) + MAKE_CASE(ARMISD::VLD1x2_UPD) + MAKE_CASE(ARMISD::VLD1x3_UPD) + MAKE_CASE(ARMISD::VLD1x4_UPD) + MAKE_CASE(ARMISD::VLD2LN_UPD) + MAKE_CASE(ARMISD::VLD3LN_UPD) + MAKE_CASE(ARMISD::VLD4LN_UPD) + MAKE_CASE(ARMISD::VLD1DUP_UPD) + MAKE_CASE(ARMISD::VLD2DUP_UPD) + MAKE_CASE(ARMISD::VLD3DUP_UPD) + MAKE_CASE(ARMISD::VLD4DUP_UPD) + MAKE_CASE(ARMISD::VST1_UPD) + MAKE_CASE(ARMISD::VST3_UPD) + MAKE_CASE(ARMISD::VST1x2_UPD) + MAKE_CASE(ARMISD::VST1x3_UPD) + MAKE_CASE(ARMISD::VST1x4_UPD) + MAKE_CASE(ARMISD::VST2LN_UPD) + MAKE_CASE(ARMISD::VST3LN_UPD) + MAKE_CASE(ARMISD::VST4LN_UPD) + } +#undef MAKE_CASE + + return SelectionDAGGenTargetInfo::getTargetNodeName(Opcode); +} + bool ARMSelectionDAGInfo::isTargetMemoryOpcode(unsigned Opcode) const { - return Opcode >= ARMISD::FIRST_MEMORY_OPCODE && - Opcode <= ARMISD::LAST_MEMORY_OPCODE; + // These nodes don't have corresponding entries in *.td files yet. + if (Opcode >= ARMISD::FIRST_MEMORY_OPCODE && + Opcode <= ARMISD::LAST_MEMORY_OPCODE) + return true; + + return SelectionDAGGenTargetInfo::isTargetMemoryOpcode(Opcode); +} + +void ARMSelectionDAGInfo::verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const { + switch (N->getOpcode()) { + default: + break; + case ARMISD::WIN__DBZCHK: + // invalid number of results; expected 2, got 1 + case ARMISD::WIN__CHKSTK: + // invalid number of results; expected 1, got 2 + case ARMISD::COPY_STRUCT_BYVAL: + // invalid number of operands; expected 6, got 5 + case ARMISD::MEMCPY: + // invalid number of operands; expected 5, got 4 + return; + } + + SelectionDAGGenTargetInfo::verifyTargetNode(DAG, N); } // Emit, if possible, a specialized version of the given Libcall. Typically this diff --git a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h index d68150e66567c..c731768a5ef68 100644 --- a/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -17,7 +17,62 @@ #include "llvm/CodeGen/RuntimeLibcallUtil.h" #include "llvm/CodeGen/SelectionDAGTargetInfo.h" +#define GET_SDNODE_ENUM +#include "ARMGenSDNodeInfo.inc" + namespace llvm { +namespace ARMISD { + +enum NodeType : unsigned { + DYN_ALLOC = GENERATED_OPCODE_END, // Dynamic allocation on the stack. + + MVESEXT, // Legalization aids for extending a vector into two/four vectors. + MVEZEXT, // or truncating two/four vectors into one. Eventually becomes + MVETRUNC, // stack store/load sequence, if not optimized to anything else. + + // Operands of the standard BUILD_VECTOR node are not legalized, which + // is fine if BUILD_VECTORs are always lowered to shuffles or other + // operations, but for ARM some BUILD_VECTORs are legal as-is and their + // operands need to be legalized. Define an ARM-specific version of + // BUILD_VECTOR for this purpose. + BUILD_VECTOR, + + // Vector load N-element structure to all lanes: + FIRST_MEMORY_OPCODE, + VLD1DUP = FIRST_MEMORY_OPCODE, + VLD2DUP, + VLD3DUP, + VLD4DUP, + + // NEON loads with post-increment base updates: + VLD1_UPD, + VLD2_UPD, + VLD3_UPD, + VLD4_UPD, + VLD2LN_UPD, + VLD3LN_UPD, + VLD4LN_UPD, + VLD1DUP_UPD, + VLD2DUP_UPD, + VLD3DUP_UPD, + VLD4DUP_UPD, + VLD1x2_UPD, + VLD1x3_UPD, + VLD1x4_UPD, + + // NEON stores with post-increment base updates: + VST1_UPD, + VST3_UPD, + VST2LN_UPD, + VST3LN_UPD, + VST4LN_UPD, + VST1x2_UPD, + VST1x3_UPD, + VST1x4_UPD, + LAST_MEMORY_OPCODE = VST1x4_UPD, +}; + +} // namespace ARMISD namespace ARM_AM { static inline ShiftOpc getShiftOpcForNode(unsigned Opcode) { @@ -35,10 +90,17 @@ namespace ARM_AM { } } // end namespace ARM_AM -class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { +class ARMSelectionDAGInfo : public SelectionDAGGenTargetInfo { public: + ARMSelectionDAGInfo(); + + const char *getTargetNodeName(unsigned Opcode) const override; + bool isTargetMemoryOpcode(unsigned Opcode) const override; + void verifyTargetNode(const SelectionDAG &DAG, + const SDNode *N) const override; + SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, @@ -65,7 +127,6 @@ class ARMSelectionDAGInfo : public SelectionDAGTargetInfo { SDValue Size, unsigned Align, RTLIB::Libcall LC) const; }; - -} +} // namespace llvm #endif diff --git a/llvm/lib/Target/ARM/CMakeLists.txt b/llvm/lib/Target/ARM/CMakeLists.txt index fa778cad4af8e..eb3ad01a54fb2 100644 --- a/llvm/lib/Target/ARM/CMakeLists.txt +++ b/llvm/lib/Target/ARM/CMakeLists.txt @@ -15,6 +15,7 @@ tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) tablegen(LLVM ARMGenRegisterBank.inc -gen-register-bank) tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM ARMGenSDNodeInfo.inc -gen-sd-node-info) tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) tablegen(LLVM ARMGenSystemRegister.inc -gen-searchable-tables) diff --git a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp index ca9afded0c0c4..c763da95fa455 100644 --- a/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp +++ b/llvm/unittests/Target/ARM/ARMSelectionDAGTest.cpp @@ -5,7 +5,7 @@ // //===----------------------------------------------------------------------===// -#include "ARMISelLowering.h" +#include "ARMSelectionDAGInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/AsmParser/Parser.h"