@@ -3437,6 +3437,30 @@ void SIInstrInfo::removeModOperands(MachineInstr &MI) const {
34373437 }
34383438}
34393439
3440+ std::optional<int64_t > SIInstrInfo::extractSubregFromImm (int64_t Imm,
3441+ unsigned SubRegIndex) {
3442+ switch (SubRegIndex) {
3443+ case AMDGPU::NoSubRegister:
3444+ return Imm;
3445+ case AMDGPU::sub0:
3446+ return Lo_32 (Imm);
3447+ case AMDGPU::sub1:
3448+ return Hi_32 (Imm);
3449+ case AMDGPU::lo16:
3450+ return SignExtend64<16 >(Imm);
3451+ case AMDGPU::hi16:
3452+ return SignExtend64<16 >(Imm >> 16 );
3453+ case AMDGPU::sub1_lo16:
3454+ return SignExtend64<16 >(Imm >> 32 );
3455+ case AMDGPU::sub1_hi16:
3456+ return SignExtend64<16 >(Imm >> 48 );
3457+ default :
3458+ return std::nullopt ;
3459+ }
3460+
3461+ llvm_unreachable (" covered subregister switch" );
3462+ }
3463+
34403464bool SIInstrInfo::foldImmediate (MachineInstr &UseMI, MachineInstr &DefMI,
34413465 Register Reg, MachineRegisterInfo *MRI) const {
34423466 if (!MRI->hasOneNonDBGUse (Reg))
@@ -3446,25 +3470,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34463470 if (!getConstValDefinedInReg (DefMI, Reg, Imm))
34473471 return false ;
34483472
3449- auto getImmFor = [=](const MachineOperand &UseOp) -> int64_t {
3450- switch (UseOp.getSubReg ()) {
3451- default :
3452- return Imm;
3453- case AMDGPU::sub0:
3454- return Lo_32 (Imm);
3455- case AMDGPU::sub1:
3456- return Hi_32 (Imm);
3457- case AMDGPU::lo16:
3458- return SignExtend64<16 >(Imm);
3459- case AMDGPU::hi16:
3460- return SignExtend64<16 >(Imm >> 16 );
3461- case AMDGPU::sub1_lo16:
3462- return SignExtend64<16 >(Imm >> 32 );
3463- case AMDGPU::sub1_hi16:
3464- return SignExtend64<16 >(Imm >> 48 );
3465- }
3466- };
3467-
34683473 assert (!DefMI.getOperand (0 ).getSubReg () && " Expected SSA form" );
34693474
34703475 unsigned Opc = UseMI.getOpcode ();
@@ -3480,7 +3485,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
34803485 : AMDGPU::V_MOV_B32_e32
34813486 : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
34823487 : AMDGPU::S_MOV_B32;
3483- APInt Imm (Is64Bit ? 64 : 32 , getImmFor (UseMI.getOperand (1 )),
3488+
3489+ std::optional<int64_t > SubRegImm =
3490+ extractSubregFromImm (Imm, UseMI.getOperand (1 ).getSubReg ());
3491+
3492+ APInt Imm (Is64Bit ? 64 : 32 , *SubRegImm,
34843493 /* isSigned=*/ true , /* implicitTrunc=*/ true );
34853494
34863495 if (RI.isAGPR (*MRI, DstReg)) {
@@ -3591,7 +3600,8 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35913600 if (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
35923601 return false ;
35933602
3594- const int64_t Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
3603+ const std::optional<int64_t > SubRegImm = extractSubregFromImm (
3604+ Imm, RegSrc == Src1 ? Src0->getSubReg () : Src1->getSubReg ());
35953605
35963606 // FIXME: This would be a lot easier if we could return a new instruction
35973607 // instead of having to modify in place.
@@ -3608,7 +3618,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36083618 UseMI.untieRegOperand (
36093619 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
36103620
3611- Src1->ChangeToImmediate (Imm );
3621+ Src1->ChangeToImmediate (*SubRegImm );
36123622
36133623 removeModOperands (UseMI);
36143624 UseMI.setDesc (get (NewOpc));
@@ -3679,8 +3689,11 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36793689 UseMI.untieRegOperand (
36803690 AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
36813691
3692+ const std::optional<int64_t > SubRegImm =
3693+ extractSubregFromImm (Imm, Src2->getSubReg ());
3694+
36823695 // ChangingToImmediate adds Src2 back to the instruction.
3683- Src2->ChangeToImmediate (getImmFor (*Src2) );
3696+ Src2->ChangeToImmediate (*SubRegImm );
36843697
36853698 // These come before src2.
36863699 removeModOperands (UseMI);
0 commit comments