@@ -2131,11 +2131,15 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21312131 Register DstLo = RI.getSubReg (Dst, AMDGPU::sub0);
21322132 Register DstHi = RI.getSubReg (Dst, AMDGPU::sub1);
21332133
2134+ const MCInstrDesc &Mov64Desc = get (AMDGPU::V_MOV_B64_e32);
2135+ const TargetRegisterClass *Mov64RC =
2136+ getRegClass (Mov64Desc, /* OpNum=*/ 0 );
2137+
21342138 const MachineOperand &SrcOp = MI.getOperand (1 );
21352139 // FIXME: Will this work for 64-bit floating point immediates?
21362140 assert (!SrcOp.isFPImm ());
2137- if (ST.hasMovB64 ()) {
2138- MI.setDesc (get (AMDGPU::V_MOV_B64_e32) );
2141+ if (ST.hasMovB64 () && Mov64RC-> contains (Dst) ) {
2142+ MI.setDesc (Mov64Desc );
21392143 if (SrcOp.isReg () || isInlineConstant (MI, 1 ) ||
21402144 isUInt<32 >(SrcOp.getImm ()) || ST.has64BitLiterals ())
21412145 break ;
@@ -2144,17 +2148,22 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21442148 APInt Imm (64 , SrcOp.getImm ());
21452149 APInt Lo (32 , Imm.getLoBits (32 ).getZExtValue ());
21462150 APInt Hi (32 , Imm.getHiBits (32 ).getZExtValue ());
2147- if (ST.hasPkMovB32 () && Lo == Hi && isInlineConstant (Lo)) {
2148- BuildMI (MBB, MI, DL, get (AMDGPU::V_PK_MOV_B32), Dst)
2149- .addImm (SISrcMods::OP_SEL_1)
2150- .addImm (Lo.getSExtValue ())
2151- .addImm (SISrcMods::OP_SEL_1)
2152- .addImm (Lo.getSExtValue ())
2153- .addImm (0 ) // op_sel_lo
2154- .addImm (0 ) // op_sel_hi
2155- .addImm (0 ) // neg_lo
2156- .addImm (0 ) // neg_hi
2157- .addImm (0 ); // clamp
2151+ const MCInstrDesc &PkMovDesc = get (AMDGPU::V_PK_MOV_B32);
2152+ const TargetRegisterClass *PkMovRC =
2153+ getRegClass (PkMovDesc, /* OpNum=*/ 0 );
2154+
2155+ if (ST.hasPkMovB32 () && Lo == Hi && isInlineConstant (Lo) &&
2156+ PkMovRC->contains (Dst)) {
2157+ BuildMI (MBB, MI, DL, PkMovDesc, Dst)
2158+ .addImm (SISrcMods::OP_SEL_1)
2159+ .addImm (Lo.getSExtValue ())
2160+ .addImm (SISrcMods::OP_SEL_1)
2161+ .addImm (Lo.getSExtValue ())
2162+ .addImm (0 ) // op_sel_lo
2163+ .addImm (0 ) // op_sel_hi
2164+ .addImm (0 ) // neg_lo
2165+ .addImm (0 ) // neg_hi
2166+ .addImm (0 ); // clamp
21582167 } else {
21592168 BuildMI (MBB, MI, DL, get (AMDGPU::V_MOV_B32_e32), DstLo)
21602169 .addImm (Lo.getSExtValue ())
@@ -5172,7 +5181,8 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
51725181 // aligned register constraint.
51735182 // FIXME: We do not verify inline asm operands, but custom inline asm
51745183 // verification is broken anyway
5175- if (ST.needsAlignedVGPRs () && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO) {
5184+ if (ST.needsAlignedVGPRs () && Opcode != AMDGPU::AV_MOV_B64_IMM_PSEUDO &&
5185+ Opcode != AMDGPU::V_MOV_B64_PSEUDO) {
51765186 const TargetRegisterClass *RC = RI.getRegClassForReg (MRI, Reg);
51775187 if (RI.hasVectorRegisters (RC) && MO.getSubReg ()) {
51785188 if (const TargetRegisterClass *SubRC =
0 commit comments