Skip to content

Commit 9647523

Browse files
rampitecmemfrob
authored andcommitted
[AMDGPU] Make some VOP1 instructions rematerializable
This is a pilot change to verify the logic. The rest will be done in a same way, at least the rest of VOP1. Differential Revision: https://reviews.llvm.org/D105742
1 parent 4213204 commit 9647523

File tree

3 files changed

+587
-22
lines changed

3 files changed

+587
-22
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -107,23 +107,19 @@ static bool nodesHaveSameOperandValue(SDNode *N0, SDNode* N1, unsigned OpName) {
107107

108108
bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
109109
AAResults *AA) const {
110-
// TODO: The generic check fails for VALU instructions that should be
111-
// rematerializable due to implicit reads of exec. We really want all of the
112-
// generic logic for this except for this.
113-
switch (MI.getOpcode()) {
114-
case AMDGPU::V_MOV_B32_e32:
115-
case AMDGPU::V_MOV_B32_e64:
116-
case AMDGPU::V_MOV_B64_PSEUDO:
117-
case AMDGPU::V_ACCVGPR_READ_B32_e64:
118-
case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
119-
// No non-standard implicit operands.
120-
assert(MI.getDesc().getNumOperands() == 2);
121-
assert(MI.getDesc().getNumImplicitDefs() == 0);
122-
assert(MI.getDesc().getNumImplicitUses() == 1);
123-
return MI.getNumOperands() == 3;
124-
default:
125-
return false;
110+
if (isVOP1(MI) || isVOP3(MI) || isSDWA(MI)) {
111+
// Normally VALU use of exec would block the rematerialization, but that
112+
// is OK in this case to have an implicit exec read as all VALU do.
113+
// We really want all of the generic logic for this except for this.
114+
115+
// Another potential implicit use is mode register. The core logic of
116+
// the RA will not attempt rematerialization if mode is set anywhere
117+
// in the function, otherwise it is safe since mode is not changed.
118+
return !MI.hasImplicitDef() &&
119+
MI.getNumImplicitOperands() == MI.getDesc().getNumImplicitUses();
126120
}
121+
122+
return false;
127123
}
128124

129125
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,

llvm/lib/Target/AMDGPU/VOP1Instructions.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def V_READFIRSTLANE_B32 :
203203
let Inst{31-25} = 0x3f; //encoding
204204
}
205205

206+
let isReMaterializable = 1 in {
206207
let SchedRW = [WriteDoubleCvt] in {
207208
// OMod clears exceptions when set in this instruction
208209
defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64_SPECIAL_OMOD, fp_to_sint>;
@@ -246,6 +247,7 @@ defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32
246247
defm V_CVT_OFF_F32_I4 : VOP1Inst <"v_cvt_off_f32_i4", VOP1_F32_I32>;
247248
} // End ReadsModeReg = 0, mayRaiseFPException = 0
248249
} // End SchedRW = [WriteFloatCvt]
250+
} // End isReMaterializable = 1
249251

250252
let ReadsModeReg = 0, mayRaiseFPException = 0 in {
251253
defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP1_F32_I32, AMDGPUcvt_f32_ubyte0>;

0 commit comments

Comments
 (0)