8
8
9
9
#include " SIFrameLowering.h"
10
10
#include " AMDGPU.h"
11
+ #include " AMDGPULaneMaskUtils.h"
11
12
#include " GCNSubtarget.h"
12
13
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
13
14
#include " SIMachineFunctionInfo.h"
@@ -984,6 +985,7 @@ void SIFrameLowering::emitCSRSpillStores(
984
985
const SIInstrInfo *TII = ST.getInstrInfo ();
985
986
const SIRegisterInfo &TRI = TII->getRegisterInfo ();
986
987
MachineRegisterInfo &MRI = MF.getRegInfo ();
988
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get (ST);
987
989
988
990
// Spill Whole-Wave Mode VGPRs. Save only the inactive lanes of the scratch
989
991
// registers. However, save all lanes of callee-saved VGPRs. Due to this, we
@@ -1015,8 +1017,7 @@ void SIFrameLowering::emitCSRSpillStores(
1015
1017
StoreWWMRegisters (WWMScratchRegs);
1016
1018
1017
1019
auto EnableAllLanes = [&]() {
1018
- unsigned MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1019
- BuildMI (MBB, MBBI, DL, TII->get (MovOpc), TRI.getExec ()).addImm (-1 );
1020
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.MovOpc ), LMC.ExecReg ).addImm (-1 );
1020
1021
};
1021
1022
1022
1023
if (!WWMCalleeSavedRegs.empty ()) {
@@ -1043,8 +1044,7 @@ void SIFrameLowering::emitCSRSpillStores(
1043
1044
TII->getWholeWaveFunctionSetup (MF)->eraseFromParent ();
1044
1045
} else if (ScratchExecCopy) {
1045
1046
// FIXME: Split block and make terminator.
1046
- unsigned ExecMov = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1047
- BuildMI (MBB, MBBI, DL, TII->get (ExecMov), TRI.getExec ())
1047
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.MovOpc ), LMC.ExecReg )
1048
1048
.addReg (ScratchExecCopy, RegState::Kill);
1049
1049
LiveUnits.addReg (ScratchExecCopy);
1050
1050
}
@@ -1092,6 +1092,7 @@ void SIFrameLowering::emitCSRSpillRestores(
1092
1092
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
1093
1093
const SIInstrInfo *TII = ST.getInstrInfo ();
1094
1094
const SIRegisterInfo &TRI = TII->getRegisterInfo ();
1095
+ const AMDGPU::LaneMaskConstants &LMC = AMDGPU::LaneMaskConstants::get (ST);
1095
1096
Register FramePtrReg = FuncInfo->getFrameOffsetReg ();
1096
1097
1097
1098
for (const auto &Spill : FuncInfo->getPrologEpilogSGPRSpills ()) {
@@ -1147,16 +1148,14 @@ void SIFrameLowering::emitCSRSpillRestores(
1147
1148
Register OrigExec = Return.getOperand (0 ).getReg ();
1148
1149
1149
1150
if (!WWMScratchRegs.empty ()) {
1150
- unsigned XorOpc = ST.isWave32 () ? AMDGPU::S_XOR_B32 : AMDGPU::S_XOR_B64;
1151
- BuildMI (MBB, MBBI, DL, TII->get (XorOpc), TRI.getExec ())
1151
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.XorOpc ), LMC.ExecReg )
1152
1152
.addReg (OrigExec)
1153
1153
.addImm (-1 );
1154
1154
RestoreWWMRegisters (WWMScratchRegs);
1155
1155
}
1156
1156
1157
1157
// Restore original EXEC.
1158
- unsigned MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1159
- BuildMI (MBB, MBBI, DL, TII->get (MovOpc), TRI.getExec ()).addReg (OrigExec);
1158
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.MovOpc ), LMC.ExecReg ).addReg (OrigExec);
1160
1159
1161
1160
// Drop the first operand and update the opcode.
1162
1161
Return.removeOperand (0 );
@@ -1173,8 +1172,7 @@ void SIFrameLowering::emitCSRSpillRestores(
1173
1172
RestoreWWMRegisters (WWMScratchRegs);
1174
1173
if (!WWMCalleeSavedRegs.empty ()) {
1175
1174
if (ScratchExecCopy) {
1176
- unsigned MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1177
- BuildMI (MBB, MBBI, DL, TII->get (MovOpc), TRI.getExec ()).addImm (-1 );
1175
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.MovOpc ), LMC.ExecReg ).addImm (-1 );
1178
1176
} else {
1179
1177
ScratchExecCopy = buildScratchExecCopy (LiveUnits, MF, MBB, MBBI, DL,
1180
1178
/* IsProlog*/ false ,
@@ -1185,8 +1183,7 @@ void SIFrameLowering::emitCSRSpillRestores(
1185
1183
RestoreWWMRegisters (WWMCalleeSavedRegs);
1186
1184
if (ScratchExecCopy) {
1187
1185
// FIXME: Split block and make terminator.
1188
- unsigned ExecMov = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
1189
- BuildMI (MBB, MBBI, DL, TII->get (ExecMov), TRI.getExec ())
1186
+ BuildMI (MBB, MBBI, DL, TII->get (LMC.MovOpc ), LMC.ExecReg )
1190
1187
.addReg (ScratchExecCopy, RegState::Kill);
1191
1188
}
1192
1189
}
0 commit comments