@@ -3573,54 +3573,93 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
3573
3573
assert (!UseMI.getOperand (0 ).getSubReg () && " Expected SSA form" );
3574
3574
3575
3575
Register DstReg = UseMI.getOperand (0 ).getReg ();
3576
- unsigned OpSize = getOpSize (UseMI, 0 );
3577
- bool Is16Bit = OpSize == 2 ;
3578
- bool Is64Bit = OpSize == 8 ;
3579
- bool isVGPRCopy = RI.isVGPR (*MRI, DstReg);
3580
- unsigned NewOpc = isVGPRCopy ? Is64Bit ? AMDGPU::V_MOV_B64_PSEUDO
3581
- : AMDGPU::V_MOV_B32_e32
3582
- : Is64Bit ? AMDGPU::S_MOV_B64_IMM_PSEUDO
3583
- : AMDGPU::S_MOV_B32;
3584
-
3585
- std::optional<int64_t > SubRegImm =
3586
- extractSubregFromImm (Imm, UseMI.getOperand (1 ).getSubReg ());
3587
-
3588
- APInt Imm (Is64Bit ? 64 : 32 , *SubRegImm,
3589
- /* isSigned=*/ true , /* implicitTrunc=*/ true );
3590
-
3591
- if (RI.isAGPR (*MRI, DstReg)) {
3592
- if (Is64Bit || !isInlineConstant (Imm))
3593
- return false ;
3594
- NewOpc = AMDGPU::V_ACCVGPR_WRITE_B32_e64;
3595
- }
3576
+ Register UseSubReg = UseMI.getOperand (1 ).getSubReg ();
3577
+
3578
+ const TargetRegisterClass *DstRC = RI.getRegClassForReg (*MRI, DstReg);
3579
+
3580
+ bool Is16Bit = UseSubReg != AMDGPU::NoSubRegister &&
3581
+ RI.getSubRegIdxSize (UseSubReg) == 16 ;
3596
3582
3597
3583
if (Is16Bit) {
3598
- if (isVGPRCopy )
3584
+ if (RI. hasVGPRs (DstRC) )
3599
3585
return false ; // Do not clobber vgpr_hi16
3600
3586
3601
- if (DstReg.isVirtual () && UseMI. getOperand ( 0 ). getSubReg () != AMDGPU::lo16)
3587
+ if (DstReg.isVirtual () && UseSubReg != AMDGPU::lo16)
3602
3588
return false ;
3603
-
3604
- UseMI.getOperand (0 ).setSubReg (0 );
3605
- if (DstReg.isPhysical ()) {
3606
- DstReg = RI.get32BitRegister (DstReg);
3607
- UseMI.getOperand (0 ).setReg (DstReg);
3608
- }
3609
- assert (UseMI.getOperand (1 ).getReg ().isVirtual ());
3610
3589
}
3611
3590
3612
3591
MachineFunction *MF = UseMI.getMF ();
3613
- const MCInstrDesc &NewMCID = get (NewOpc);
3614
- const TargetRegisterClass *NewDefRC = getRegClass (NewMCID, 0 , &RI, *MF);
3615
3592
3616
- if (DstReg.isPhysical ()) {
3617
- if (!NewDefRC->contains (DstReg))
3618
- return false ;
3619
- } else if (!MRI->constrainRegClass (DstReg, NewDefRC))
3593
+ unsigned NewOpc = AMDGPU::INSTRUCTION_LIST_END;
3594
+ MCRegister MovDstPhysReg =
3595
+ DstReg.isPhysical () ? DstReg.asMCReg () : MCRegister ();
3596
+
3597
+ std::optional<int64_t > SubRegImm = extractSubregFromImm (Imm, UseSubReg);
3598
+
3599
+ // TODO: Try to fold with AMDGPU::V_MOV_B16_t16_e64
3600
+ for (unsigned MovOp :
3601
+ {AMDGPU::S_MOV_B32, AMDGPU::V_MOV_B32_e32, AMDGPU::S_MOV_B64,
3602
+ AMDGPU::V_MOV_B64_PSEUDO, AMDGPU::V_ACCVGPR_WRITE_B32_e64}) {
3603
+ const MCInstrDesc &MovDesc = get (MovOp);
3604
+
3605
+ const TargetRegisterClass *MovDstRC = getRegClass (MovDesc, 0 , &RI, *MF);
3606
+ if (Is16Bit) {
3607
+ // We just need to find a correctly sized register class, so the
3608
+ // subregister index compatibility doesn't matter since we're statically
3609
+ // extracting the immediate value.
3610
+ MovDstRC = RI.getMatchingSuperRegClass (MovDstRC, DstRC, AMDGPU::lo16);
3611
+ if (!MovDstRC)
3612
+ continue ;
3613
+
3614
+ if (MovDstPhysReg) {
3615
+ // FIXME: We probably should not do this. If there is a live value in
3616
+ // the high half of the register, it will be corrupted.
3617
+ MovDstPhysReg =
3618
+ RI.getMatchingSuperReg (MovDstPhysReg, AMDGPU::lo16, MovDstRC);
3619
+ if (!MovDstPhysReg)
3620
+ continue ;
3621
+ }
3622
+ }
3623
+
3624
+ // Result class isn't the right size, try the next instruction.
3625
+ if (MovDstPhysReg) {
3626
+ if (!MovDstRC->contains (MovDstPhysReg))
3627
+ return false ;
3628
+ } else if (!MRI->constrainRegClass (DstReg, MovDstRC)) {
3629
+ // TODO: This will be overly conservative in the case of 16-bit virtual
3630
+ // SGPRs. We could hack up the virtual register uses to use a compatible
3631
+ // 32-bit class.
3632
+ continue ;
3633
+ }
3634
+
3635
+ const MCOperandInfo &OpInfo = MovDesc.operands ()[1 ];
3636
+
3637
+ // Ensure the interpreted immediate value is a valid operand in the new
3638
+ // mov.
3639
+ //
3640
+ // FIXME: isImmOperandLegal should have form that doesn't require existing
3641
+ // MachineInstr or MachineOperand
3642
+ if (!RI.opCanUseLiteralConstant (OpInfo.OperandType ) &&
3643
+ !isInlineConstant (*SubRegImm, OpInfo.OperandType ))
3644
+ break ;
3645
+
3646
+ NewOpc = MovOp;
3647
+ break ;
3648
+ }
3649
+
3650
+ if (NewOpc == AMDGPU::INSTRUCTION_LIST_END)
3620
3651
return false ;
3621
3652
3653
+ if (Is16Bit) {
3654
+ UseMI.getOperand (0 ).setSubReg (AMDGPU::NoSubRegister);
3655
+ if (MovDstPhysReg)
3656
+ UseMI.getOperand (0 ).setReg (MovDstPhysReg);
3657
+ assert (UseMI.getOperand (1 ).getReg ().isVirtual ());
3658
+ }
3659
+
3660
+ const MCInstrDesc &NewMCID = get (NewOpc);
3622
3661
UseMI.setDesc (NewMCID);
3623
- UseMI.getOperand (1 ).ChangeToImmediate (Imm. getSExtValue () );
3662
+ UseMI.getOperand (1 ).ChangeToImmediate (*SubRegImm );
3624
3663
UseMI.addImplicitDefUseOperands (*MF);
3625
3664
return true ;
3626
3665
}
0 commit comments