@@ -2111,6 +2111,28 @@ SDValue NVPTXTargetLowering::LowerSTACKSAVE(SDValue Op,
21112111 return DAG.getMergeValues ({ASC, SDValue (SS.getNode (), 1 )}, DL);
21122112}
21132113
2114+ // By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
2115+ // (see LegalizeDAG.cpp). This is slow and uses local memory.
2116+ // We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
2117+ SDValue
2118+ NVPTXTargetLowering::LowerCONCAT_VECTORS (SDValue Op, SelectionDAG &DAG) const {
2119+ SDNode *Node = Op.getNode ();
2120+ SDLoc dl (Node);
2121+ SmallVector<SDValue, 8 > Ops;
2122+ unsigned NumOperands = Node->getNumOperands ();
2123+ for (unsigned i = 0 ; i < NumOperands; ++i) {
2124+ SDValue SubOp = Node->getOperand (i);
2125+ EVT VVT = SubOp.getNode ()->getValueType (0 );
2126+ EVT EltVT = VVT.getVectorElementType ();
2127+ unsigned NumSubElem = VVT.getVectorNumElements ();
2128+ for (unsigned j = 0 ; j < NumSubElem; ++j) {
2129+ Ops.push_back (DAG.getNode (ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
2130+ DAG.getIntPtrConstant (j, dl)));
2131+ }
2132+ }
2133+ return DAG.getBuildVector (Node->getValueType (0 ), dl, Ops);
2134+ }
2135+
21142136SDValue NVPTXTargetLowering::LowerBITCAST (SDValue Op, SelectionDAG &DAG) const {
21152137 // Handle bitcasting from v2i8 without hitting the default promotion
21162138 // strategy which goes through stack memory.
@@ -2824,6 +2846,8 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
28242846 return LowerINSERT_VECTOR_ELT (Op, DAG);
28252847 case ISD::VECTOR_SHUFFLE:
28262848 return LowerVECTOR_SHUFFLE (Op, DAG);
2849+ case ISD::CONCAT_VECTORS:
2850+ return LowerCONCAT_VECTORS (Op, DAG);
28272851 case ISD::STORE:
28282852 return LowerSTORE (Op, DAG);
28292853 case ISD::LOAD:
0 commit comments