@@ -2328,20 +2328,23 @@ SDValue NVPTXTargetLowering::LowerBUILD_VECTOR(SDValue Op,
23282328 // Lower non-const v4i8 vector as byte-wise constructed i32, which allows us
23292329 // to optimize calculation of constant parts.
23302330 if (VT == MVT::v4i8) {
2331- SDValue C8 = DAG.getConstant (8 , DL, MVT::i32 );
2332- SDValue E01 = DAG.getNode (
2333- NVPTXISD::BFI, DL, MVT::i32 ,
2334- DAG.getAnyExtOrTrunc (Op->getOperand (1 ), DL, MVT::i32 ),
2335- DAG.getAnyExtOrTrunc (Op->getOperand (0 ), DL, MVT::i32 ), C8, C8);
2336- SDValue E012 =
2337- DAG.getNode (NVPTXISD::BFI, DL, MVT::i32 ,
2338- DAG.getAnyExtOrTrunc (Op->getOperand (2 ), DL, MVT::i32 ),
2339- E01 , DAG.getConstant (16 , DL, MVT::i32 ), C8);
2340- SDValue E0123 =
2341- DAG.getNode (NVPTXISD::BFI, DL, MVT::i32 ,
2342- DAG.getAnyExtOrTrunc (Op->getOperand (3 ), DL, MVT::i32 ),
2343- E012 , DAG.getConstant (24 , DL, MVT::i32 ), C8);
2344- return DAG.getNode (ISD::BITCAST, DL, VT, E0123 );
2331+ SDValue PRMT__10 = DAG.getNode (
2332+ NVPTXISD::PRMT, DL, MVT::v4i8,
2333+ {DAG.getAnyExtOrTrunc (Op->getOperand (0 ), DL, MVT::i32 ),
2334+ DAG.getAnyExtOrTrunc (Op->getOperand (1 ), DL, MVT::i32 ),
2335+ DAG.getConstant (0x3340 , DL, MVT::i32 ),
2336+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2337+ SDValue PRMT_210 = DAG.getNode (
2338+ NVPTXISD::PRMT, DL, MVT::v4i8,
2339+ {PRMT__10, DAG.getAnyExtOrTrunc (Op->getOperand (2 ), DL, MVT::i32 ),
2340+ DAG.getConstant (0x3410 , DL, MVT::i32 ),
2341+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2342+ SDValue PRMT3210 = DAG.getNode (
2343+ NVPTXISD::PRMT, DL, MVT::v4i8,
2344+ {PRMT_210, DAG.getAnyExtOrTrunc (Op->getOperand (3 ), DL, MVT::i32 ),
2345+ DAG.getConstant (0x4210 , DL, MVT::i32 ),
2346+ DAG.getConstant (NVPTX::PTXPrmtMode::NONE, DL, MVT::i32 )});
2347+ return DAG.getNode (ISD::BITCAST, DL, VT, PRMT3210);
23452348 }
23462349 return Op;
23472350 }
0 commit comments