@@ -2146,13 +2146,17 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21462146 Register DstLo = RI.getSubReg (Dst, AMDGPU::sub0);
21472147 Register DstHi = RI.getSubReg (Dst, AMDGPU::sub1);
21482148
2149+ MachineRegisterInfo &MRI = MI.getMF ()->getRegInfo ();
2150+ auto *TRI = MRI.getTargetRegisterInfo ();
2151+ const MCInstrDesc &Mov64Desc = get (AMDGPU::V_MOV_B64_e32);
2152+ const TargetRegisterClass *Mov64RC =
2153+ getRegClass (Mov64Desc, /* OpNum=*/ 0 , TRI);
2154+
21492155 const MachineOperand &SrcOp = MI.getOperand (1 );
21502156 // FIXME: Will this work for 64-bit floating point immediates?
21512157 assert (!SrcOp.isFPImm ());
2152- MachineRegisterInfo &MRI = MI.getMF ()->getRegInfo ();
2153- const TargetRegisterClass *RC = RI.getRegClassForReg (MRI, Dst);
2154- if (ST.hasMovB64 () && RI.isProperlyAlignedRC (*RC)) {
2155- MI.setDesc (get (AMDGPU::V_MOV_B64_e32));
2158+ if (ST.hasMovB64 () && Mov64RC->contains (Dst)) {
2159+ MI.setDesc (Mov64Desc);
21562160 if (SrcOp.isReg () || isInlineConstant (MI, 1 ) ||
21572161 isUInt<32 >(SrcOp.getImm ()) || ST.has64BitLiterals ())
21582162 break ;
@@ -2161,18 +2165,22 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
21612165 APInt Imm (64 , SrcOp.getImm ());
21622166 APInt Lo (32 , Imm.getLoBits (32 ).getZExtValue ());
21632167 APInt Hi (32 , Imm.getHiBits (32 ).getZExtValue ());
2168+ const MCInstrDesc &PkMovDesc = get (AMDGPU::V_PK_MOV_B32);
2169+ const TargetRegisterClass *PkMovRC =
2170+ getRegClass (PkMovDesc, /* OpNum=*/ 0 , TRI);
2171+
21642172 if (ST.hasPkMovB32 () && Lo == Hi && isInlineConstant (Lo) &&
2165- RI. isProperlyAlignedRC (*RC )) {
2166- BuildMI (MBB, MI, DL, get (AMDGPU::V_PK_MOV_B32) , Dst)
2167- .addImm (SISrcMods::OP_SEL_1)
2168- .addImm (Lo.getSExtValue ())
2169- .addImm (SISrcMods::OP_SEL_1)
2170- .addImm (Lo.getSExtValue ())
2171- .addImm (0 ) // op_sel_lo
2172- .addImm (0 ) // op_sel_hi
2173- .addImm (0 ) // neg_lo
2174- .addImm (0 ) // neg_hi
2175- .addImm (0 ); // clamp
2173+ PkMovRC-> contains (Dst )) {
2174+ BuildMI (MBB, MI, DL, PkMovDesc , Dst)
2175+ .addImm (SISrcMods::OP_SEL_1)
2176+ .addImm (Lo.getSExtValue ())
2177+ .addImm (SISrcMods::OP_SEL_1)
2178+ .addImm (Lo.getSExtValue ())
2179+ .addImm (0 ) // op_sel_lo
2180+ .addImm (0 ) // op_sel_hi
2181+ .addImm (0 ) // neg_lo
2182+ .addImm (0 ) // neg_hi
2183+ .addImm (0 ); // clamp
21762184 } else {
21772185 BuildMI (MBB, MI, DL, get (AMDGPU::V_MOV_B32_e32), DstLo)
21782186 .addImm (Lo.getSExtValue ())
0 commit comments