@@ -3412,8 +3412,34 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
34123412 // Really a scalar input. Just select from the low half of the register to
34133413 // avoid packing.
34143414
3415- if (VecSize == 32 || VecSize == Lo.getValueSizeInBits ()) {
3415+ if (VecSize == Lo.getValueSizeInBits ()) {
34163416 Src = Lo;
3417+ } else if (VecSize == 32 ) {
3418+ if (!Subtarget->useRealTrue16Insts ()) {
3419+ Src = Lo;
3420+ } else {
3421+ SDLoc SL (In);
3422+
3423+ if (Lo->isDivergent ()) {
3424+ SDValue Undef =
3425+ SDValue (CurDAG->getMachineNode (TargetOpcode::IMPLICIT_DEF, SL,
3426+ Lo.getValueType ()),
3427+ 0 );
3428+ const SDValue Ops[] = {
3429+ CurDAG->getTargetConstant (AMDGPU::VGPR_32RegClassID, SL,
3430+ MVT::i32 ),
3431+ Lo, CurDAG->getTargetConstant (AMDGPU::lo16, SL, MVT::i16 ),
3432+ Undef, CurDAG->getTargetConstant (AMDGPU::hi16, SL, MVT::i16 )};
3433+
3434+ Src = SDValue (CurDAG->getMachineNode (TargetOpcode::REG_SEQUENCE, SL,
3435+ Src.getValueType (), Ops),
3436+ 0 );
3437+ } else {
3438+ Src = SDValue (CurDAG->getMachineNode (AMDGPU::S_MOV_B32, SL,
3439+ Src.getValueType (), Lo),
3440+ 0 );
3441+ }
3442+ }
34173443 } else {
34183444 assert (Lo.getValueSizeInBits () == 32 && VecSize == 64 );
34193445
0 commit comments