@@ -797,6 +797,23 @@ int64_t SIRegisterInfo::getScratchInstrOffset(const MachineInstr *MI) const {
797797
798798int64_t SIRegisterInfo::getFrameIndexInstrOffset (const MachineInstr *MI,
799799 int Idx) const {
800+ switch (MI->getOpcode ()) {
801+ case AMDGPU::V_ADD_U32_e32:
802+ case AMDGPU::V_ADD_U32_e64:
803+ case AMDGPU::V_ADD_CO_U32_e32: {
804+ int OtherIdx = Idx == 1 ? 2 : 1 ;
805+ const MachineOperand &OtherOp = MI->getOperand (OtherIdx);
806+ return OtherOp.isImm () ? OtherOp.getImm () : 0 ;
807+ }
808+ case AMDGPU::V_ADD_CO_U32_e64: {
809+ int OtherIdx = Idx == 2 ? 3 : 2 ;
810+ const MachineOperand &OtherOp = MI->getOperand (OtherIdx);
811+ return OtherOp.isImm () ? OtherOp.getImm () : 0 ;
812+ }
813+ default :
814+ break ;
815+ }
816+
800817 if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
801818 return 0 ;
802819
@@ -809,7 +826,60 @@ int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
809826 return getScratchInstrOffset (MI);
810827}
811828
829+ static bool isFIPlusImmOrVGPR (const SIRegisterInfo &TRI,
830+ const MachineInstr &MI) {
831+ assert (MI.getDesc ().isAdd ());
832+ const MachineOperand &Src0 = MI.getOperand (1 );
833+ const MachineOperand &Src1 = MI.getOperand (2 );
834+
835+ if (Src0.isFI ()) {
836+ return Src1.isImm () || (Src1.isReg () && TRI.isVGPR (MI.getMF ()->getRegInfo (),
837+ Src1.getReg ()));
838+ }
839+
840+ if (Src1.isFI ()) {
841+ return Src0.isImm () || (Src0.isReg () && TRI.isVGPR (MI.getMF ()->getRegInfo (),
842+ Src0.getReg ()));
843+ }
844+
845+ return false ;
846+ }
847+
812848bool SIRegisterInfo::needsFrameBaseReg (MachineInstr *MI, int64_t Offset) const {
849+ // TODO: Handle v_add_co_u32, v_or_b32, v_and_b32 and scalar opcodes.
850+ switch (MI->getOpcode ()) {
851+ case AMDGPU::V_ADD_U32_e32: {
852+ // TODO: We could handle this but it requires work to avoid violating
853+ // operand restrictions.
854+ if (ST.getConstantBusLimit (AMDGPU::V_ADD_U32_e32) < 2 &&
855+ !isFIPlusImmOrVGPR (*this , *MI))
856+ return false ;
857+ [[fallthrough]];
858+ }
859+ case AMDGPU::V_ADD_U32_e64:
860+ // FIXME: This optimization is barely profitable enableFlatScratch as-is.
861+ //
862+ // Much of the benefit with the MUBUF handling is we avoid duplicating the
863+ // shift of the frame register, which isn't needed with scratch.
864+ //
865+ // materializeFrameBaseRegister doesn't know the register classes of the
866+ // uses, and unconditionally uses an s_add_i32, which will end up using a
867+ // copy for the vector uses.
868+ return !ST.enableFlatScratch ();
869+ case AMDGPU::V_ADD_CO_U32_e32:
870+ if (ST.getConstantBusLimit (AMDGPU::V_ADD_CO_U32_e32) < 2 &&
871+ !isFIPlusImmOrVGPR (*this , *MI))
872+ return false ;
873+ // We can't deal with the case where the carry out has a use (though this
874+ // should never happen)
875+ return MI->getOperand (3 ).isDead ();
876+ case AMDGPU::V_ADD_CO_U32_e64:
877+ // TODO: Should we check use_empty instead?
878+ return MI->getOperand (1 ).isDead ();
879+ default :
880+ break ;
881+ }
882+
813883 if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
814884 return false ;
815885
@@ -860,6 +930,8 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
860930 .addFrameIndex (FrameIdx);
861931
862932 if (ST.enableFlatScratch () ) {
933+ // FIXME: Mark scc as dead
934+ // FIXME: Make sure scc isn't live in.
863935 BuildMI (*MBB, Ins, DL, TII->get (AMDGPU::S_ADD_I32), BaseReg)
864936 .addReg (OffsetReg, RegState::Kill)
865937 .addReg (FIReg);
@@ -877,6 +949,86 @@ Register SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
877949void SIRegisterInfo::resolveFrameIndex (MachineInstr &MI, Register BaseReg,
878950 int64_t Offset) const {
879951 const SIInstrInfo *TII = ST.getInstrInfo ();
952+
953+ switch (MI.getOpcode ()) {
954+ case AMDGPU::V_ADD_U32_e32:
955+ case AMDGPU::V_ADD_CO_U32_e32: {
956+ MachineOperand *FIOp = &MI.getOperand (2 );
957+ MachineOperand *ImmOp = &MI.getOperand (1 );
958+ if (!FIOp->isFI ())
959+ std::swap (FIOp, ImmOp);
960+
961+ if (!ImmOp->isImm ()) {
962+ assert (Offset == 0 );
963+ FIOp->ChangeToRegister (BaseReg, false );
964+ TII->legalizeOperandsVOP2 (MI.getMF ()->getRegInfo (), MI);
965+ return ;
966+ }
967+
968+ int64_t TotalOffset = ImmOp->getImm () + Offset;
969+ if (TotalOffset == 0 ) {
970+ MI.setDesc (TII->get (AMDGPU::COPY));
971+ for (unsigned I = MI.getNumOperands () - 1 ; I != 1 ; --I)
972+ MI.removeOperand (I);
973+
974+ MI.getOperand (1 ).ChangeToRegister (BaseReg, false );
975+ return ;
976+ }
977+
978+ ImmOp->setImm (TotalOffset);
979+
980+ MachineBasicBlock *MBB = MI.getParent ();
981+ MachineFunction *MF = MBB->getParent ();
982+ MachineRegisterInfo &MRI = MF->getRegInfo ();
983+
984+ // FIXME: materializeFrameBaseRegister does not know the register class of
985+ // the uses of the frame index, and assumes SGPR for enableFlatScratch. Emit
986+ // a copy so we have a legal operand and hope the register coalescer can
987+ // clean it up.
988+ if (isSGPRReg (MRI, BaseReg)) {
989+ Register BaseRegVGPR =
990+ MRI.createVirtualRegister (&AMDGPU::VGPR_32RegClass);
991+ BuildMI (*MBB, MI, MI.getDebugLoc (), TII->get (AMDGPU::COPY), BaseRegVGPR)
992+ .addReg (BaseReg);
993+ MI.getOperand (2 ).ChangeToRegister (BaseRegVGPR, false );
994+ } else {
995+ MI.getOperand (2 ).ChangeToRegister (BaseReg, false );
996+ }
997+ return ;
998+ }
999+ case AMDGPU::V_ADD_U32_e64:
1000+ case AMDGPU::V_ADD_CO_U32_e64: {
1001+ int Src0Idx = MI.getNumExplicitDefs ();
1002+ MachineOperand *FIOp = &MI.getOperand (Src0Idx);
1003+ MachineOperand *ImmOp = &MI.getOperand (Src0Idx + 1 );
1004+ if (!FIOp->isFI ())
1005+ std::swap (FIOp, ImmOp);
1006+
1007+ if (!ImmOp->isImm ()) {
1008+ FIOp->ChangeToRegister (BaseReg, false );
1009+ TII->legalizeOperandsVOP3 (MI.getMF ()->getRegInfo (), MI);
1010+ return ;
1011+ }
1012+
1013+ int64_t TotalOffset = ImmOp->getImm () + Offset;
1014+ if (TotalOffset == 0 ) {
1015+ MI.setDesc (TII->get (AMDGPU::COPY));
1016+
1017+ for (unsigned I = MI.getNumOperands () - 1 ; I != 1 ; --I)
1018+ MI.removeOperand (I);
1019+
1020+ MI.getOperand (1 ).ChangeToRegister (BaseReg, false );
1021+ } else {
1022+ FIOp->ChangeToRegister (BaseReg, false );
1023+ ImmOp->setImm (TotalOffset);
1024+ }
1025+
1026+ return ;
1027+ }
1028+ default :
1029+ break ;
1030+ }
1031+
8801032 bool IsFlat = TII->isFLATScratch (MI);
8811033
8821034#ifndef NDEBUG
@@ -925,6 +1077,18 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, Register BaseReg,
9251077bool SIRegisterInfo::isFrameOffsetLegal (const MachineInstr *MI,
9261078 Register BaseReg,
9271079 int64_t Offset) const {
1080+
1081+ switch (MI->getOpcode ()) {
1082+ case AMDGPU::V_ADD_U32_e32:
1083+ case AMDGPU::V_ADD_CO_U32_e32:
1084+ return true ;
1085+ case AMDGPU::V_ADD_U32_e64:
1086+ case AMDGPU::V_ADD_CO_U32_e64:
1087+ return ST.hasVOP3Literal () || AMDGPU::isInlinableIntLiteral (Offset);
1088+ default :
1089+ break ;
1090+ }
1091+
9281092 if (!SIInstrInfo::isMUBUF (*MI) && !SIInstrInfo::isFLATScratch (*MI))
9291093 return false ;
9301094
0 commit comments