@@ -2332,20 +2332,23 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
23322332 // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
23332333 // to optimize calculation of constant parts.
23342334 if (VT == MVT::v4i8) {
2335- SDValue C8 = DAG.getConstant (8 , DL, MVT::i32 );
2336- SDValue E01 = DAG.getNode (
2337- NVPTXISD::BFI, DL, MVT::i32 ,
2338- DAG.getAnyExtOrTrunc (Op->getOperand (1 ), DL, MVT::i32 ),
2339- DAG.getAnyExtOrTrunc (Op->getOperand (0 ), DL, MVT::i32 ), C8, C8);
2340- SDValue E012 =
2341- DAG.getNode (NVPTXISD::BFI, DL, MVT::i32 ,
2342- DAG.getAnyExtOrTrunc (Op->getOperand (2 ), DL, MVT::i32 ),
2343- E01 , DAG.getConstant (16 , DL, MVT::i32 ), C8);
2344- SDValue E0123 =
2345- DAG.getNode (NVPTXISD::BFI, DL, MVT::i32 ,
2346- DAG.getAnyExtOrTrunc (Op->getOperand (3 ), DL, MVT::i32 ),
2347- E012 , DAG.getConstant (24 , DL, MVT::i32 ), C8);
2348- return DAG.getNode (ISD::BITCAST, DL, VT, E0123 );
2335+ SDValue PRMT__10 = DAG.getNode (
2336+ NVPTXISD::PRMT, DL, MVT::v4i8,
2337+ {DAG.getAnyExtOrTrunc (Op->getOperand (0 ), DL, MVT::i32 ),
2338+ DAG.getAnyExtOrTrunc (Op->getOperand (1 ), DL, MVT::i32 ),
2339+ DAG.getConstant (0x3340 , DL, MVT::i32 ),
2340+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2341+ SDValue PRMT_210 = DAG.getNode (
2342+ NVPTXISD::PRMT, DL, MVT::v4i8,
2343+ {PRMT__10, DAG.getAnyExtOrTrunc (Op->getOperand (2 ), DL, MVT::i32 ),
2344+ DAG.getConstant (0x3410 , DL, MVT::i32 ),
2345+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2346+ SDValue PRMT3210 = DAG.getNode (
2347+ NVPTXISD::PRMT, DL, MVT::v4i8,
2348+ {PRMT_210, DAG.getAnyExtOrTrunc (Op->getOperand (3 ), DL, MVT::i32 ),
2349+ DAG.getConstant (0x4210 , DL, MVT::i32 ),
2350+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2351+ return DAG.getNode (ISD::BITCAST, DL, VT, PRMT3210);
23492352 }
23502353 return Op;
23512354 }
0 commit comments